From 12d5a03e33c5c5a98c5946a8bd0c5026f781855d Mon Sep 17 00:00:00 2001 From: Lee Thomason Date: Wed, 29 Feb 2012 16:19:03 -0800 Subject: [PATCH 1/3] clean up readme.txt --- readme.txt | 50 +++++++++++++++++--------------------------------- 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/readme.txt b/readme.txt index 1291a70..62a1972 100755 --- a/readme.txt +++ b/readme.txt @@ -71,7 +71,7 @@ Both parsers: Advantages of TinyXML-2
  1. The focus of all future dev.
  2. -
  3. Many fewer memory allocation (about 1/100th), uses less memory (about 40% of TinyXML-1), and faster.
  4. +
  5. Many fewer memory allocation (1/10th to 1/100th), uses less memory (about 40% of TinyXML-1), and faster.
  6. No STL requirement.
  7. More modern C++, including a proper namespace.
  8. Proper and useful handling of whitespace
  9. @@ -107,52 +107,41 @@ As a first step, all newlines / carriage-returns / line-feeds are normalized to line-feed character, as required by the XML spec. White space in text is preserved. For example: -@verbatim + Hello, World -@endverbatim The leading space before the "Hello" and the double space after the comma are preserved. Line-feeds are preserved, as in this example: -@verbatim Hello again, World -@endverbatim However, white space between elements is *not* preserved. Although not strictly compliant, tracking and reporting inta-element space is awkward, and not normally valuable. TinyXML-2 sees these as the same XML: -@verbatim - -1 -2 -3 - -@endverbatim + + 1 + 2 + 3 + -@verbatim -123 -@endverbatim + 123

    Entities

    TinyXML-2 recognizes the pre-defined "character entities", meaning special characters. Namely: -@verbatim & & < < > > " " ' ' -@endverbatim These are recognized when the XML document is read, and translated to there UTF-8 equivalents. For instance, text with the XML of: -@verbatim Far & Away -@endverbatim will have the Value() of "Far & Away" when queried from the XMLText object, and will be written back to the XML stream/file as an ampersand. @@ -167,42 +156,37 @@ regular code point. The output is correct, but the entity syntax isn't preserved

    Print to file

    You can directly use the convenience function: -@verbatim + XMLDocument doc; ... doc.Save( "foo.xml" ); -@endverbatim Or the XMLPrinter class: -@verbatim + XMLPrinter printer( fp ); doc.Print( &printer ); -@endverbatim

    Print to memory

    Printing to memory is supported by the XMLPrinter. -@verbatim + XMLPrinter printer; doc->Print( &printer ); // printer.CStr() has a const char* to the XML -@endverbatim

    Print without an XMLDocument

    - When loading, an XML parser is very useful. However, sometimes - when saving, it just gets in the way. The code is often set up - for streaming, and constructing the DOM is just overhead. +When loading, an XML parser is very useful. However, sometimes +when saving, it just gets in the way. The code is often set up +for streaming, and constructing the DOM is just overhead. - The Printer supports the streaming case. The following code - prints out a trivially simple XML file without ever creating - an XML document. +The Printer supports the streaming case. The following code +prints out a trivially simple XML file without ever creating +an XML document. -@verbatim XMLPrinter printer( fp ); printer.OpenElement( "foo" ); printer.PushAttribute( "foo", "bar" ); printer.CloseElement(); -@endverbatim

    Using and Installing

    From 2705731775e79446507432219dc06ff1625ae6e2 Mon Sep 17 00:00:00 2001 From: Lee Thomason Date: Fri, 2 Mar 2012 09:04:53 -0800 Subject: [PATCH 2/3] add version info --- dox | 2 +- setversion.py | 79 +++++++++++++++++++++++++++++++++++++++++++++++++++ tinyxml2.h | 3 ++ 3 files changed, 83 insertions(+), 1 deletion(-) create mode 100644 setversion.py diff --git a/dox b/dox index ce47ff2..c9268e9 100755 --- a/dox +++ b/dox @@ -32,7 +32,7 @@ PROJECT_NAME = "TinyXML-2" # This could be handy for archiving the generated documentation or # if some version control system is used. -PROJECT_NUMBER = +PROJECT_NUMBER = 0.9.0 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer diff --git a/setversion.py b/setversion.py new file mode 100644 index 0000000..3455900 --- /dev/null +++ b/setversion.py @@ -0,0 +1,79 @@ +# Python program to set the version. +############################################## + +import re +import sys + +def fileProcess( name, lineFunction ): + filestream = open( name, 'r' ) + if filestream.closed: + print( "file " + name + " not open." ) + return + + output = "" + print( "--- Processing " + name + " ---------" ) + while 1: + line = filestream.readline() + if not line: break + output += lineFunction( line ) + filestream.close() + + if not output: return # basic error checking + + print( "Writing file " + name ) + filestream = open( name, "w" ); + filestream.write( output ); + filestream.close() + + +def echoInput( line ): + return line + +major = input( "Major: " ) +minor = input( "Minor: " ) +build = input( "Build: " ) + +print "Setting dox,tinyxml2.h" +print "Version: " + `major` + "." + `minor` + "." + `build` + +#### Write the tinyxml.h #### + +def engineRule( line ): + + matchMajor = "static const int TIXML2_MAJOR_VERSION" + matchMinor = "static const int TIXML2_MINOR_VERSION" + matchBuild = "static const int TIXML2_PATCH_VERSION" + + if line[0:len(matchMajor)] == matchMajor: + print "1)tinyxml2.h Major found" + return matchMajor + " = " + `major` + ";\n" + + elif line[0:len(matchMinor)] == matchMinor: + print "2)tinyxml2.h Minor found" + return matchMinor + " = " + `minor` + ";\n" + + elif line[0:len(matchBuild)] == matchBuild: + print "3)tinyxml2.h Build found" + return matchBuild + " = " + `build` + ";\n" + + else: + return line; + +fileProcess( "tinyxml2.h", engineRule ) + + +#### Write the dox #### + +def doxRule( line ): + + match = "PROJECT_NUMBER" + + if line[0:len( match )] == match: + print "dox project found" + return "PROJECT_NUMBER = " + `major` + "." + `minor` + "." + `build` + "\n" + + else: + return line; + +fileProcess( "dox", doxRule ) + diff --git a/tinyxml2.h b/tinyxml2.h index ac89e36..d1ae0d7 100644 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -84,6 +84,9 @@ distribution. #define TIXML_SSCANF sscanf #endif +static const int TIXML2_MAJOR_VERSION = 0; +static const int TIXML2_MINOR_VERSION = 9; +static const int TIXML2_PATCH_VERSION = 0; namespace tinyxml2 { From 6f381b773923ab75eb9b30c3028e60a218fb9b96 Mon Sep 17 00:00:00 2001 From: Lee Thomason Date: Fri, 2 Mar 2012 12:59:39 -0800 Subject: [PATCH 3/3] Added performance test and option to leave entities --- readme.txt | 2 +- tinyxml2.cpp | 58 +++++++++++++---------- tinyxml2.h | 17 +++++-- tinyxml2/tinyxml2.vcxproj | 3 ++ tinyxml2/tinyxml2.vcxproj.filters | 5 ++ xmltest.cpp | 79 +++++++++++++++++++++++++++++++ 6 files changed, 133 insertions(+), 31 deletions(-) diff --git a/readme.txt b/readme.txt index 62a1972..200805c 100755 --- a/readme.txt +++ b/readme.txt @@ -55,7 +55,7 @@ complete XML needs, TinyXML-2 is not the parser for you. Which should you use? TinyXML-2 uses a similar API to TinyXML-1 and the same rich test cases. But the implementation of the parser is completely re-written to make it more appropriate for use in a game. It uses less memory, is faster, -and user far few memory allocations. +and uses far few memory allocations. TinyXML-2 has no requirement for STL, but has also dropped all STL support. All strings are query and set as 'const char*'. This allows the use of internal diff --git a/tinyxml2.cpp b/tinyxml2.cpp index bb491de..c545180 100644 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -740,7 +740,7 @@ char* XMLText::ParseDeep( char* p, StrPair* ) return p; } else { - p = value.ParseText( p, "<", StrPair::TEXT_ELEMENT ); + p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES ); if ( !p ) { document->SetError( ERROR_PARSING_TEXT, start, 0 ); } @@ -916,14 +916,14 @@ bool XMLUnknown::Accept( XMLVisitor* visitor ) const } // --------- XMLAttribute ---------- // -char* XMLAttribute::ParseDeep( char* p ) +char* XMLAttribute::ParseDeep( char* p, bool processEntities ) { p = name.ParseText( p, "=", StrPair::ATTRIBUTE_NAME ); if ( !p || !*p ) return 0; char endTag[2] = { *p, 0 }; ++p; - p = value.ParseText( p, endTag, StrPair::ATTRIBUTE_VALUE ); + p = value.ParseText( p, endTag, processEntities ? StrPair::ATTRIBUTE_VALUE : StrPair::ATTRIBUTE_VALUE_LEAVE_ENTITIES ); //if ( value.Empty() ) return 0; return p; } @@ -1141,7 +1141,7 @@ char* XMLElement::ParseAttributes( char* p ) XMLAttribute* attrib = new (document->attributePool.Alloc() ) XMLAttribute(); attrib->memPool = &document->attributePool; - p = attrib->ParseDeep( p ); + p = attrib->ParseDeep( p, document->ProcessEntities() ); if ( !p || Attribute( attrib->Name() ) ) { DELETE_ATTRIBUTE( attrib ); document->SetError( ERROR_PARSING_ATTRIBUTE, start, p ); @@ -1250,9 +1250,13 @@ bool XMLElement::Accept( XMLVisitor* visitor ) const // --------- XMLDocument ----------- // -XMLDocument::XMLDocument() : +XMLDocument::XMLDocument( bool _processEntities ) : XMLNode( 0 ), writeBOM( false ), + processEntities( _processEntities ), + errorID( 0 ), + errorStr1( 0 ), + errorStr2( 0 ), charBuffer( 0 ) { document = this; // avoid warning about 'this' in initializer list @@ -1474,7 +1478,8 @@ XMLPrinter::XMLPrinter( FILE* file ) : firstElement( true ), fp( file ), depth( 0 ), - textDepth( -1 ) + textDepth( -1 ), + processEntities( true ) { for( int i=0; i 0 && *q < ENTITY_RANGE ) { - // Check for entities. If one is found, flush - // the stream up until the entity, write the - // entity, and keep looking. - if ( flag[*q] ) { - while ( p < q ) { - Print( "%c", *p ); + if ( processEntities ) { + while ( *q ) { + // Remember, char is sometimes signed. (How many times has that bitten me?) + if ( *q > 0 && *q < ENTITY_RANGE ) { + // Check for entities. If one is found, flush + // the stream up until the entity, write the + // entity, and keep looking. + if ( flag[*q] ) { + while ( p < q ) { + Print( "%c", *p ); + ++p; + } + for( int i=0; i 0 ) { + if ( !processEntities || (q-p > 0) ) { Print( "%s", p ); } } @@ -1735,6 +1742,7 @@ void XMLPrinter::PushUnknown( const char* value ) bool XMLPrinter::VisitEnter( const XMLDocument& doc ) { + processEntities = doc.ProcessEntities(); if ( doc.HasBOM() ) { PushHeader( true, false ); } @@ -1785,5 +1793,3 @@ bool XMLPrinter::Visit( const XMLUnknown& unknown ) PushUnknown( unknown.Value() ); return true; } - - diff --git a/tinyxml2.h b/tinyxml2.h index d1ae0d7..368916c 100644 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -115,8 +115,10 @@ public: NEEDS_NEWLINE_NORMALIZATION = 0x02, TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION, + TEXT_ELEMENT_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION, ATTRIBUTE_NAME = 0, ATTRIBUTE_VALUE = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION, + ATTRIBUTE_VALUE_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION, COMMENT = NEEDS_NEWLINE_NORMALIZATION, }; @@ -804,7 +806,7 @@ private: void operator=( const XMLAttribute& ); // not supported void SetName( const char* name ); - char* ParseDeep( char* p ); + char* ParseDeep( char* p, bool processEntities ); mutable StrPair name; mutable StrPair value; @@ -962,7 +964,7 @@ class XMLDocument : public XMLNode friend class XMLElement; public: /// constructor - XMLDocument(); + XMLDocument( bool processEntities = true ); ~XMLDocument(); virtual XMLDocument* ToDocument() { return this; } @@ -993,6 +995,11 @@ public: */ void SaveFile( const char* filename ); + bool ProcessEntities() const { return processEntities; } + + /** + Returns true if this document has a leading Byte Order Mark of UTF8. + */ bool HasBOM() const { return writeBOM; } /** Return the root element of DOM. Equivalent to FirstChildElement(). @@ -1071,8 +1078,8 @@ public: // internal char* Identify( char* p, XMLNode** node ); - virtual XMLNode* ShallowClone( XMLDocument* document ) const { return 0; } - virtual bool ShallowEqual( const XMLNode* compare ) const { return false; } + virtual XMLNode* ShallowClone( XMLDocument* /*document*/ ) const { return 0; } + virtual bool ShallowEqual( const XMLNode* /*compare*/ ) const { return false; } private: XMLDocument( const XMLDocument& ); // not supported @@ -1080,6 +1087,7 @@ private: void InitDocument(); bool writeBOM; + bool processEntities; int errorID; const char* errorStr1; const char* errorStr2; @@ -1196,6 +1204,7 @@ private: FILE* fp; int depth; int textDepth; + bool processEntities; enum { ENTITY_RANGE = 64, diff --git a/tinyxml2/tinyxml2.vcxproj b/tinyxml2/tinyxml2.vcxproj index 89eef04..f9bd173 100644 --- a/tinyxml2/tinyxml2.vcxproj +++ b/tinyxml2/tinyxml2.vcxproj @@ -80,6 +80,9 @@ + + + diff --git a/tinyxml2/tinyxml2.vcxproj.filters b/tinyxml2/tinyxml2.vcxproj.filters index 37eabf3..20cfa20 100644 --- a/tinyxml2/tinyxml2.vcxproj.filters +++ b/tinyxml2/tinyxml2.vcxproj.filters @@ -19,4 +19,9 @@ Source Files + + + Source Files + + \ No newline at end of file diff --git a/xmltest.cpp b/xmltest.cpp index b3ffc2f..532c1be 100644 --- a/xmltest.cpp +++ b/xmltest.cpp @@ -3,9 +3,12 @@ #include #include #include +#include #if defined( _MSC_VER ) #include + #define WIN32_LEAN_AND_MEAN + #include _CrtMemState startMemState; _CrtMemState endMemState; #endif @@ -211,6 +214,8 @@ int main( int /*argc*/, const char* /*argv*/ ) //gNewTotal = gNew - newStart; } + + { const char* error = "\n" "\n" @@ -458,6 +463,24 @@ int main( int /*argc*/, const char* /*argv*/ ) fclose( textfile ); } + { + // Suppress entities. + const char* passages = + "" + "" + "Crazy &ttk;" + ""; + + XMLDocument doc( false ); + doc.Parse( passages ); + + XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->Attribute( "context" ), + "Line 5 has "quotation marks" and 'apostrophe marks'." ); + XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->FirstChild()->Value(), + "Crazy &ttk;" ); + doc.Print(); + } + { const char* test = ""; @@ -653,6 +676,62 @@ int main( int /*argc*/, const char* /*argv*/ ) XMLTest( "Clone and Equal", 4, count ); } + // ----------- Performance tracking -------------- + { +#if defined( _MSC_VER ) + __int64 start, end, freq; + QueryPerformanceFrequency( (LARGE_INTEGER*) &freq ); +#endif + +#if defined(_MSC_VER) +#pragma warning ( push ) +#pragma warning ( disable : 4996 ) // Fail to see a compelling reason why this should be deprecated. +#endif + FILE* fp = fopen( "dream.xml", "r" ); +#if defined(_MSC_VER) +#pragma warning ( pop ) +#endif + fseek( fp, 0, SEEK_END ); + long size = ftell( fp ); + fseek( fp, 0, SEEK_SET ); + + char* mem = new char[size+1]; + fread( mem, size, 1, fp ); + fclose( fp ); + mem[size] = 0; + +#if defined( _MSC_VER ) + QueryPerformanceCounter( (LARGE_INTEGER*) &start ); +#else + clock_t cstart = clock(); +#endif + static const int COUNT = 10; + for( int i=0; i