From 6f381b773923ab75eb9b30c3028e60a218fb9b96 Mon Sep 17 00:00:00 2001 From: Lee Thomason Date: Fri, 2 Mar 2012 12:59:39 -0800 Subject: [PATCH] Added performance test and option to leave entities --- readme.txt | 2 +- tinyxml2.cpp | 58 +++++++++++++---------- tinyxml2.h | 17 +++++-- tinyxml2/tinyxml2.vcxproj | 3 ++ tinyxml2/tinyxml2.vcxproj.filters | 5 ++ xmltest.cpp | 79 +++++++++++++++++++++++++++++++ 6 files changed, 133 insertions(+), 31 deletions(-) diff --git a/readme.txt b/readme.txt index 62a1972..200805c 100755 --- a/readme.txt +++ b/readme.txt @@ -55,7 +55,7 @@ complete XML needs, TinyXML-2 is not the parser for you. Which should you use? TinyXML-2 uses a similar API to TinyXML-1 and the same rich test cases. But the implementation of the parser is completely re-written to make it more appropriate for use in a game. It uses less memory, is faster, -and user far few memory allocations. +and uses far few memory allocations. TinyXML-2 has no requirement for STL, but has also dropped all STL support. All strings are query and set as 'const char*'. This allows the use of internal diff --git a/tinyxml2.cpp b/tinyxml2.cpp index bb491de..c545180 100644 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -740,7 +740,7 @@ char* XMLText::ParseDeep( char* p, StrPair* ) return p; } else { - p = value.ParseText( p, "<", StrPair::TEXT_ELEMENT ); + p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES ); if ( !p ) { document->SetError( ERROR_PARSING_TEXT, start, 0 ); } @@ -916,14 +916,14 @@ bool XMLUnknown::Accept( XMLVisitor* visitor ) const } // --------- XMLAttribute ---------- // -char* XMLAttribute::ParseDeep( char* p ) +char* XMLAttribute::ParseDeep( char* p, bool processEntities ) { p = name.ParseText( p, "=", StrPair::ATTRIBUTE_NAME ); if ( !p || !*p ) return 0; char endTag[2] = { *p, 0 }; ++p; - p = value.ParseText( p, endTag, StrPair::ATTRIBUTE_VALUE ); + p = value.ParseText( p, endTag, processEntities ? StrPair::ATTRIBUTE_VALUE : StrPair::ATTRIBUTE_VALUE_LEAVE_ENTITIES ); //if ( value.Empty() ) return 0; return p; } @@ -1141,7 +1141,7 @@ char* XMLElement::ParseAttributes( char* p ) XMLAttribute* attrib = new (document->attributePool.Alloc() ) XMLAttribute(); attrib->memPool = &document->attributePool; - p = attrib->ParseDeep( p ); + p = attrib->ParseDeep( p, document->ProcessEntities() ); if ( !p || Attribute( attrib->Name() ) ) { DELETE_ATTRIBUTE( attrib ); document->SetError( ERROR_PARSING_ATTRIBUTE, start, p ); @@ -1250,9 +1250,13 @@ bool XMLElement::Accept( XMLVisitor* visitor ) const // --------- XMLDocument ----------- // -XMLDocument::XMLDocument() : +XMLDocument::XMLDocument( bool _processEntities ) : XMLNode( 0 ), writeBOM( false ), + processEntities( _processEntities ), + errorID( 0 ), + errorStr1( 0 ), + errorStr2( 0 ), charBuffer( 0 ) { document = this; // avoid warning about 'this' in initializer list @@ -1474,7 +1478,8 @@ XMLPrinter::XMLPrinter( FILE* file ) : firstElement( true ), fp( file ), depth( 0 ), - textDepth( -1 ) + textDepth( -1 ), + processEntities( true ) { for( int i=0; i 0 && *q < ENTITY_RANGE ) { - // Check for entities. If one is found, flush - // the stream up until the entity, write the - // entity, and keep looking. - if ( flag[*q] ) { - while ( p < q ) { - Print( "%c", *p ); + if ( processEntities ) { + while ( *q ) { + // Remember, char is sometimes signed. (How many times has that bitten me?) + if ( *q > 0 && *q < ENTITY_RANGE ) { + // Check for entities. If one is found, flush + // the stream up until the entity, write the + // entity, and keep looking. + if ( flag[*q] ) { + while ( p < q ) { + Print( "%c", *p ); + ++p; + } + for( int i=0; i 0 ) { + if ( !processEntities || (q-p > 0) ) { Print( "%s", p ); } } @@ -1735,6 +1742,7 @@ void XMLPrinter::PushUnknown( const char* value ) bool XMLPrinter::VisitEnter( const XMLDocument& doc ) { + processEntities = doc.ProcessEntities(); if ( doc.HasBOM() ) { PushHeader( true, false ); } @@ -1785,5 +1793,3 @@ bool XMLPrinter::Visit( const XMLUnknown& unknown ) PushUnknown( unknown.Value() ); return true; } - - diff --git a/tinyxml2.h b/tinyxml2.h index d1ae0d7..368916c 100644 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -115,8 +115,10 @@ public: NEEDS_NEWLINE_NORMALIZATION = 0x02, TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION, + TEXT_ELEMENT_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION, ATTRIBUTE_NAME = 0, ATTRIBUTE_VALUE = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION, + ATTRIBUTE_VALUE_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION, COMMENT = NEEDS_NEWLINE_NORMALIZATION, }; @@ -804,7 +806,7 @@ private: void operator=( const XMLAttribute& ); // not supported void SetName( const char* name ); - char* ParseDeep( char* p ); + char* ParseDeep( char* p, bool processEntities ); mutable StrPair name; mutable StrPair value; @@ -962,7 +964,7 @@ class XMLDocument : public XMLNode friend class XMLElement; public: /// constructor - XMLDocument(); + XMLDocument( bool processEntities = true ); ~XMLDocument(); virtual XMLDocument* ToDocument() { return this; } @@ -993,6 +995,11 @@ public: */ void SaveFile( const char* filename ); + bool ProcessEntities() const { return processEntities; } + + /** + Returns true if this document has a leading Byte Order Mark of UTF8. + */ bool HasBOM() const { return writeBOM; } /** Return the root element of DOM. Equivalent to FirstChildElement(). @@ -1071,8 +1078,8 @@ public: // internal char* Identify( char* p, XMLNode** node ); - virtual XMLNode* ShallowClone( XMLDocument* document ) const { return 0; } - virtual bool ShallowEqual( const XMLNode* compare ) const { return false; } + virtual XMLNode* ShallowClone( XMLDocument* /*document*/ ) const { return 0; } + virtual bool ShallowEqual( const XMLNode* /*compare*/ ) const { return false; } private: XMLDocument( const XMLDocument& ); // not supported @@ -1080,6 +1087,7 @@ private: void InitDocument(); bool writeBOM; + bool processEntities; int errorID; const char* errorStr1; const char* errorStr2; @@ -1196,6 +1204,7 @@ private: FILE* fp; int depth; int textDepth; + bool processEntities; enum { ENTITY_RANGE = 64, diff --git a/tinyxml2/tinyxml2.vcxproj b/tinyxml2/tinyxml2.vcxproj index 89eef04..f9bd173 100644 --- a/tinyxml2/tinyxml2.vcxproj +++ b/tinyxml2/tinyxml2.vcxproj @@ -80,6 +80,9 @@ + + + diff --git a/tinyxml2/tinyxml2.vcxproj.filters b/tinyxml2/tinyxml2.vcxproj.filters index 37eabf3..20cfa20 100644 --- a/tinyxml2/tinyxml2.vcxproj.filters +++ b/tinyxml2/tinyxml2.vcxproj.filters @@ -19,4 +19,9 @@ Source Files + + + Source Files + + \ No newline at end of file diff --git a/xmltest.cpp b/xmltest.cpp index b3ffc2f..532c1be 100644 --- a/xmltest.cpp +++ b/xmltest.cpp @@ -3,9 +3,12 @@ #include #include #include +#include #if defined( _MSC_VER ) #include + #define WIN32_LEAN_AND_MEAN + #include _CrtMemState startMemState; _CrtMemState endMemState; #endif @@ -211,6 +214,8 @@ int main( int /*argc*/, const char* /*argv*/ ) //gNewTotal = gNew - newStart; } + + { const char* error = "\n" "\n" @@ -458,6 +463,24 @@ int main( int /*argc*/, const char* /*argv*/ ) fclose( textfile ); } + { + // Suppress entities. + const char* passages = + "" + "" + "Crazy &ttk;" + ""; + + XMLDocument doc( false ); + doc.Parse( passages ); + + XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->Attribute( "context" ), + "Line 5 has "quotation marks" and 'apostrophe marks'." ); + XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->FirstChild()->Value(), + "Crazy &ttk;" ); + doc.Print(); + } + { const char* test = ""; @@ -653,6 +676,62 @@ int main( int /*argc*/, const char* /*argv*/ ) XMLTest( "Clone and Equal", 4, count ); } + // ----------- Performance tracking -------------- + { +#if defined( _MSC_VER ) + __int64 start, end, freq; + QueryPerformanceFrequency( (LARGE_INTEGER*) &freq ); +#endif + +#if defined(_MSC_VER) +#pragma warning ( push ) +#pragma warning ( disable : 4996 ) // Fail to see a compelling reason why this should be deprecated. +#endif + FILE* fp = fopen( "dream.xml", "r" ); +#if defined(_MSC_VER) +#pragma warning ( pop ) +#endif + fseek( fp, 0, SEEK_END ); + long size = ftell( fp ); + fseek( fp, 0, SEEK_SET ); + + char* mem = new char[size+1]; + fread( mem, size, 1, fp ); + fclose( fp ); + mem[size] = 0; + +#if defined( _MSC_VER ) + QueryPerformanceCounter( (LARGE_INTEGER*) &start ); +#else + clock_t cstart = clock(); +#endif + static const int COUNT = 10; + for( int i=0; i