From bc1bfb7f273373ffae68bdf53a027a56c3e4b3ef Mon Sep 17 00:00:00 2001 From: "Lee Thomason (grinliz)" Date: Mon, 20 Aug 2012 22:00:38 -0700 Subject: [PATCH] added whitespace=collapse support. tests work. code needs review --- tinyxml2.cpp | 49 +++++++++++++++++++++++++++++++++++++++++++------ tinyxml2.h | 35 ++++++++++++++++++++++++++++------- xmltest.cpp | 19 +++++++++++++++++++ 3 files changed, 90 insertions(+), 13 deletions(-) diff --git a/tinyxml2.cpp b/tinyxml2.cpp index 8069d04..b20d9f0 100644 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -23,10 +23,12 @@ distribution. #include "tinyxml2.h" -#include -#include -#include -#include +#include // yes, this one new style header, is in the Android SDK. +#ifdef ANDROID_NDK + #include +#else + #include +#endif using namespace tinyxml2; @@ -156,6 +158,31 @@ char* StrPair::ParseName( char* p ) } +void StrPair::CollapseWhitespace() +{ + // Trim leading space. + start = XMLUtil::SkipWhiteSpace( start ); + + if ( start && *start ) { + char* p = start; // the read pointer + char* q = start; // the write pointer + + while( *p ) { + if ( XMLUtil::IsWhiteSpace( *p )) { + p = XMLUtil::SkipWhiteSpace( p ); + if ( *p == 0 ) + break; // don't write to q; this trims the trailing space. + *q = ' '; + ++q; + } + *q = *p; + ++q; + ++p; + } + *q = 0; + } +} + const char* StrPair::GetStr() { @@ -232,6 +259,11 @@ const char* StrPair::GetStr() } *q = 0; } + // The loop below has plenty going on, and this + // is a less useful mode. Break it out. + if ( flags & COLLAPSE_WHITESPACE ) { + CollapseWhitespace(); + } flags = (flags & NEEDS_DELETE); } return start; @@ -815,7 +847,11 @@ char* XMLText::ParseDeep( char* p, StrPair* ) return p; } else { - p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES ); + int flags = document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES; + if ( document->WhitespaceMode() == COLLAPSE_WHITESPACE ) + flags |= StrPair::COLLAPSE_WHITESPACE; + + p = value.ParseText( p, "<", flags ); if ( !p ) { document->SetError( XML_ERROR_PARSING_TEXT, start, 0 ); } @@ -1416,11 +1452,12 @@ bool XMLElement::Accept( XMLVisitor* visitor ) const // --------- XMLDocument ----------- // -XMLDocument::XMLDocument( bool _processEntities ) : +XMLDocument::XMLDocument( bool _processEntities, Whitespace _whitespace ) : XMLNode( 0 ), writeBOM( false ), processEntities( _processEntities ), errorID( 0 ), + whitespace( _whitespace ), errorStr1( 0 ), errorStr2( 0 ), charBuffer( 0 ) diff --git a/tinyxml2.h b/tinyxml2.h index e1c22c5..b766d35 100644 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -24,11 +24,21 @@ distribution. #ifndef TINYXML2_INCLUDED #define TINYXML2_INCLUDED -#include -#include -#include -#include -#include +#ifdef ANDROID_NDK + #include + #include + #include + #include + #include + #include +#else + #include + #include + #include + #include + #include + #include +#endif /* TODO: intern strings instead of allocation. @@ -112,6 +122,7 @@ public: enum { NEEDS_ENTITY_PROCESSING = 0x01, NEEDS_NEWLINE_NORMALIZATION = 0x02, + COLLAPSE_WHITESPACE = 0x04, TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION, TEXT_ELEMENT_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION, @@ -140,6 +151,7 @@ public: private: void Reset(); + void CollapseWhitespace(); enum { NEEDS_FLUSH = 0x100, @@ -365,6 +377,7 @@ public: // correct, but simple, and usually works. static const char* SkipWhiteSpace( const char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast(p) ) ) { ++p; } return p; } static char* SkipWhiteSpace( char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast(p) ) ) { ++p; } return p; } + static bool IsWhiteSpace( char p ) { return !IsUTF8Continuation(p) && isspace( static_cast(p) ); } inline static bool StringEqual( const char* p, const char* q, int nChar=INT_MAX ) { int n = 0; @@ -1031,6 +1044,12 @@ private: }; +enum Whitespace { + PRESERVE_WHITESPACE, + COLLAPSE_WHITESPACE +}; + + /** A Document binds together all the functionality. It can be saved, loaded, and printed to the screen. All Nodes are connected and allocated to a Document. @@ -1041,7 +1060,7 @@ class XMLDocument : public XMLNode friend class XMLElement; public: /// constructor - XMLDocument( bool processEntities = true ); + XMLDocument( bool processEntities = true, Whitespace = PRESERVE_WHITESPACE ); ~XMLDocument(); virtual XMLDocument* ToDocument() { return this; } @@ -1086,7 +1105,8 @@ public: */ int SaveFile( FILE* ); - bool ProcessEntities() const { return processEntities; } + bool ProcessEntities() const { return processEntities; } + Whitespace WhitespaceMode() const { return whitespace; } /** Returns true if this document has a leading Byte Order Mark of UTF8. @@ -1189,6 +1209,7 @@ private: bool writeBOM; bool processEntities; int errorID; + Whitespace whitespace; const char* errorStr1; const char* errorStr2; char* charBuffer; diff --git a/xmltest.cpp b/xmltest.cpp index 81018aa..f48fce8 100644 --- a/xmltest.cpp +++ b/xmltest.cpp @@ -938,6 +938,25 @@ int main( int /*argc*/, const char ** /*argv*/ ) XMLTest( "QueryBoolText", boolValue, true, false ); } + // ----------- Whitespace ------------ + { + const char* xml = "" + " This \nis ' text ' " + " This is ' text ' \n" + "This is ' \n\n text '" + ""; + XMLDocument doc( true, COLLAPSE_WHITESPACE ); + doc.Parse( xml ); + + const XMLElement* element = doc.FirstChildElement(); + for( const XMLElement* parent = element->FirstChildElement(); + parent; + parent = parent->NextSiblingElement() ) + { + XMLTest( "Whitespace collapse", "This is ' text '", parent->GetText() ); + } + } + // ----------- Performance tracking -------------- {