added whitespace=collapse support. tests work. code needs review

This commit is contained in:
Lee Thomason (grinliz) 2012-08-20 22:00:38 -07:00
parent 77d7f206f6
commit bc1bfb7f27
3 changed files with 90 additions and 13 deletions

View File

@ -23,10 +23,12 @@ distribution.
#include "tinyxml2.h"
#include <cstdio>
#include <cstdlib>
#include <new>
#include <new> // yes, this one new style header, is in the Android SDK.
#ifdef ANDROID_NDK
#include <stddef.h>
#else
#include <cstddef>
#endif
using namespace tinyxml2;
@ -156,6 +158,31 @@ char* StrPair::ParseName( char* p )
}
void StrPair::CollapseWhitespace()
{
// Trim leading space.
start = XMLUtil::SkipWhiteSpace( start );
if ( start && *start ) {
char* p = start; // the read pointer
char* q = start; // the write pointer
while( *p ) {
if ( XMLUtil::IsWhiteSpace( *p )) {
p = XMLUtil::SkipWhiteSpace( p );
if ( *p == 0 )
break; // don't write to q; this trims the trailing space.
*q = ' ';
++q;
}
*q = *p;
++q;
++p;
}
*q = 0;
}
}
const char* StrPair::GetStr()
{
@ -232,6 +259,11 @@ const char* StrPair::GetStr()
}
*q = 0;
}
// The loop below has plenty going on, and this
// is a less useful mode. Break it out.
if ( flags & COLLAPSE_WHITESPACE ) {
CollapseWhitespace();
}
flags = (flags & NEEDS_DELETE);
}
return start;
@ -815,7 +847,11 @@ char* XMLText::ParseDeep( char* p, StrPair* )
return p;
}
else {
p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES );
int flags = document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES;
if ( document->WhitespaceMode() == COLLAPSE_WHITESPACE )
flags |= StrPair::COLLAPSE_WHITESPACE;
p = value.ParseText( p, "<", flags );
if ( !p ) {
document->SetError( XML_ERROR_PARSING_TEXT, start, 0 );
}
@ -1416,11 +1452,12 @@ bool XMLElement::Accept( XMLVisitor* visitor ) const
// --------- XMLDocument ----------- //
XMLDocument::XMLDocument( bool _processEntities ) :
XMLDocument::XMLDocument( bool _processEntities, Whitespace _whitespace ) :
XMLNode( 0 ),
writeBOM( false ),
processEntities( _processEntities ),
errorID( 0 ),
whitespace( _whitespace ),
errorStr1( 0 ),
errorStr2( 0 ),
charBuffer( 0 )

View File

@ -24,11 +24,21 @@ distribution.
#ifndef TINYXML2_INCLUDED
#define TINYXML2_INCLUDED
#ifdef ANDROID_NDK
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#else
#include <cctype>
#include <climits>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <cstdarg>
#endif
/*
TODO: intern strings instead of allocation.
@ -112,6 +122,7 @@ public:
enum {
NEEDS_ENTITY_PROCESSING = 0x01,
NEEDS_NEWLINE_NORMALIZATION = 0x02,
COLLAPSE_WHITESPACE = 0x04,
TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
TEXT_ELEMENT_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION,
@ -140,6 +151,7 @@ public:
private:
void Reset();
void CollapseWhitespace();
enum {
NEEDS_FLUSH = 0x100,
@ -365,6 +377,7 @@ public:
// correct, but simple, and usually works.
static const char* SkipWhiteSpace( const char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<const unsigned char*>(p) ) ) { ++p; } return p; }
static char* SkipWhiteSpace( char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<unsigned char*>(p) ) ) { ++p; } return p; }
static bool IsWhiteSpace( char p ) { return !IsUTF8Continuation(p) && isspace( static_cast<unsigned char>(p) ); }
inline static bool StringEqual( const char* p, const char* q, int nChar=INT_MAX ) {
int n = 0;
@ -1031,6 +1044,12 @@ private:
};
enum Whitespace {
PRESERVE_WHITESPACE,
COLLAPSE_WHITESPACE
};
/** A Document binds together all the functionality.
It can be saved, loaded, and printed to the screen.
All Nodes are connected and allocated to a Document.
@ -1041,7 +1060,7 @@ class XMLDocument : public XMLNode
friend class XMLElement;
public:
/// constructor
XMLDocument( bool processEntities = true );
XMLDocument( bool processEntities = true, Whitespace = PRESERVE_WHITESPACE );
~XMLDocument();
virtual XMLDocument* ToDocument() { return this; }
@ -1087,6 +1106,7 @@ public:
int SaveFile( FILE* );
bool ProcessEntities() const { return processEntities; }
Whitespace WhitespaceMode() const { return whitespace; }
/**
Returns true if this document has a leading Byte Order Mark of UTF8.
@ -1189,6 +1209,7 @@ private:
bool writeBOM;
bool processEntities;
int errorID;
Whitespace whitespace;
const char* errorStr1;
const char* errorStr2;
char* charBuffer;

View File

@ -938,6 +938,25 @@ int main( int /*argc*/, const char ** /*argv*/ )
XMLTest( "QueryBoolText", boolValue, true, false );
}
// ----------- Whitespace ------------
{
const char* xml = "<element>"
"<a> This \nis &apos; text &apos; </a>"
"<b> This is &apos; text &apos; \n</b>"
"<c>This is &apos; \n\n text &apos;</c>"
"</element>";
XMLDocument doc( true, COLLAPSE_WHITESPACE );
doc.Parse( xml );
const XMLElement* element = doc.FirstChildElement();
for( const XMLElement* parent = element->FirstChildElement();
parent;
parent = parent->NextSiblingElement() )
{
XMLTest( "Whitespace collapse", "This is ' text '", parent->GetText() );
}
}
// ----------- Performance tracking --------------
{