Added performance test and option to leave entities

This commit is contained in:
Lee Thomason 2012-03-02 12:59:39 -08:00
parent 2705731775
commit 6f381b7739
6 changed files with 133 additions and 31 deletions

View File

@ -55,7 +55,7 @@ complete XML needs, TinyXML-2 is not the parser for you.
Which should you use? TinyXML-2 uses a similar API to TinyXML-1 and the same Which should you use? TinyXML-2 uses a similar API to TinyXML-1 and the same
rich test cases. But the implementation of the parser is completely re-written rich test cases. But the implementation of the parser is completely re-written
to make it more appropriate for use in a game. It uses less memory, is faster, to make it more appropriate for use in a game. It uses less memory, is faster,
and user far few memory allocations. and uses far few memory allocations.
TinyXML-2 has no requirement for STL, but has also dropped all STL support. All TinyXML-2 has no requirement for STL, but has also dropped all STL support. All
strings are query and set as 'const char*'. This allows the use of internal strings are query and set as 'const char*'. This allows the use of internal

View File

@ -740,7 +740,7 @@ char* XMLText::ParseDeep( char* p, StrPair* )
return p; return p;
} }
else { else {
p = value.ParseText( p, "<", StrPair::TEXT_ELEMENT ); p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES );
if ( !p ) { if ( !p ) {
document->SetError( ERROR_PARSING_TEXT, start, 0 ); document->SetError( ERROR_PARSING_TEXT, start, 0 );
} }
@ -916,14 +916,14 @@ bool XMLUnknown::Accept( XMLVisitor* visitor ) const
} }
// --------- XMLAttribute ---------- // // --------- XMLAttribute ---------- //
char* XMLAttribute::ParseDeep( char* p ) char* XMLAttribute::ParseDeep( char* p, bool processEntities )
{ {
p = name.ParseText( p, "=", StrPair::ATTRIBUTE_NAME ); p = name.ParseText( p, "=", StrPair::ATTRIBUTE_NAME );
if ( !p || !*p ) return 0; if ( !p || !*p ) return 0;
char endTag[2] = { *p, 0 }; char endTag[2] = { *p, 0 };
++p; ++p;
p = value.ParseText( p, endTag, StrPair::ATTRIBUTE_VALUE ); p = value.ParseText( p, endTag, processEntities ? StrPair::ATTRIBUTE_VALUE : StrPair::ATTRIBUTE_VALUE_LEAVE_ENTITIES );
//if ( value.Empty() ) return 0; //if ( value.Empty() ) return 0;
return p; return p;
} }
@ -1141,7 +1141,7 @@ char* XMLElement::ParseAttributes( char* p )
XMLAttribute* attrib = new (document->attributePool.Alloc() ) XMLAttribute(); XMLAttribute* attrib = new (document->attributePool.Alloc() ) XMLAttribute();
attrib->memPool = &document->attributePool; attrib->memPool = &document->attributePool;
p = attrib->ParseDeep( p ); p = attrib->ParseDeep( p, document->ProcessEntities() );
if ( !p || Attribute( attrib->Name() ) ) { if ( !p || Attribute( attrib->Name() ) ) {
DELETE_ATTRIBUTE( attrib ); DELETE_ATTRIBUTE( attrib );
document->SetError( ERROR_PARSING_ATTRIBUTE, start, p ); document->SetError( ERROR_PARSING_ATTRIBUTE, start, p );
@ -1250,9 +1250,13 @@ bool XMLElement::Accept( XMLVisitor* visitor ) const
// --------- XMLDocument ----------- // // --------- XMLDocument ----------- //
XMLDocument::XMLDocument() : XMLDocument::XMLDocument( bool _processEntities ) :
XMLNode( 0 ), XMLNode( 0 ),
writeBOM( false ), writeBOM( false ),
processEntities( _processEntities ),
errorID( 0 ),
errorStr1( 0 ),
errorStr2( 0 ),
charBuffer( 0 ) charBuffer( 0 )
{ {
document = this; // avoid warning about 'this' in initializer list document = this; // avoid warning about 'this' in initializer list
@ -1474,7 +1478,8 @@ XMLPrinter::XMLPrinter( FILE* file ) :
firstElement( true ), firstElement( true ),
fp( file ), fp( file ),
depth( 0 ), depth( 0 ),
textDepth( -1 ) textDepth( -1 ),
processEntities( true )
{ {
for( int i=0; i<ENTITY_RANGE; ++i ) { for( int i=0; i<ENTITY_RANGE; ++i ) {
entityFlag[i] = false; entityFlag[i] = false;
@ -1540,31 +1545,33 @@ void XMLPrinter::PrintString( const char* p, bool restricted )
const char* q = p; const char* q = p;
const bool* flag = restricted ? restrictedEntityFlag : entityFlag; const bool* flag = restricted ? restrictedEntityFlag : entityFlag;
while ( *q ) { if ( processEntities ) {
// Remember, char is sometimes signed. (How many times has that bitten me?) while ( *q ) {
if ( *q > 0 && *q < ENTITY_RANGE ) { // Remember, char is sometimes signed. (How many times has that bitten me?)
// Check for entities. If one is found, flush if ( *q > 0 && *q < ENTITY_RANGE ) {
// the stream up until the entity, write the // Check for entities. If one is found, flush
// entity, and keep looking. // the stream up until the entity, write the
if ( flag[*q] ) { // entity, and keep looking.
while ( p < q ) { if ( flag[*q] ) {
Print( "%c", *p ); while ( p < q ) {
Print( "%c", *p );
++p;
}
for( int i=0; i<NUM_ENTITIES; ++i ) {
if ( entities[i].value == *q ) {
Print( "&%s;", entities[i].pattern );
break;
}
}
++p; ++p;
} }
for( int i=0; i<NUM_ENTITIES; ++i ) {
if ( entities[i].value == *q ) {
Print( "&%s;", entities[i].pattern );
break;
}
}
++p;
} }
++q;
} }
++q;
} }
// Flush the remaining string. This will be the entire // Flush the remaining string. This will be the entire
// string if an entity wasn't found. // string if an entity wasn't found.
if ( q-p > 0 ) { if ( !processEntities || (q-p > 0) ) {
Print( "%s", p ); Print( "%s", p );
} }
} }
@ -1735,6 +1742,7 @@ void XMLPrinter::PushUnknown( const char* value )
bool XMLPrinter::VisitEnter( const XMLDocument& doc ) bool XMLPrinter::VisitEnter( const XMLDocument& doc )
{ {
processEntities = doc.ProcessEntities();
if ( doc.HasBOM() ) { if ( doc.HasBOM() ) {
PushHeader( true, false ); PushHeader( true, false );
} }
@ -1785,5 +1793,3 @@ bool XMLPrinter::Visit( const XMLUnknown& unknown )
PushUnknown( unknown.Value() ); PushUnknown( unknown.Value() );
return true; return true;
} }

View File

@ -115,8 +115,10 @@ public:
NEEDS_NEWLINE_NORMALIZATION = 0x02, NEEDS_NEWLINE_NORMALIZATION = 0x02,
TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION, TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
TEXT_ELEMENT_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION,
ATTRIBUTE_NAME = 0, ATTRIBUTE_NAME = 0,
ATTRIBUTE_VALUE = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION, ATTRIBUTE_VALUE = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
ATTRIBUTE_VALUE_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION,
COMMENT = NEEDS_NEWLINE_NORMALIZATION, COMMENT = NEEDS_NEWLINE_NORMALIZATION,
}; };
@ -804,7 +806,7 @@ private:
void operator=( const XMLAttribute& ); // not supported void operator=( const XMLAttribute& ); // not supported
void SetName( const char* name ); void SetName( const char* name );
char* ParseDeep( char* p ); char* ParseDeep( char* p, bool processEntities );
mutable StrPair name; mutable StrPair name;
mutable StrPair value; mutable StrPair value;
@ -962,7 +964,7 @@ class XMLDocument : public XMLNode
friend class XMLElement; friend class XMLElement;
public: public:
/// constructor /// constructor
XMLDocument(); XMLDocument( bool processEntities = true );
~XMLDocument(); ~XMLDocument();
virtual XMLDocument* ToDocument() { return this; } virtual XMLDocument* ToDocument() { return this; }
@ -993,6 +995,11 @@ public:
*/ */
void SaveFile( const char* filename ); void SaveFile( const char* filename );
bool ProcessEntities() const { return processEntities; }
/**
Returns true if this document has a leading Byte Order Mark of UTF8.
*/
bool HasBOM() const { return writeBOM; } bool HasBOM() const { return writeBOM; }
/** Return the root element of DOM. Equivalent to FirstChildElement(). /** Return the root element of DOM. Equivalent to FirstChildElement().
@ -1071,8 +1078,8 @@ public:
// internal // internal
char* Identify( char* p, XMLNode** node ); char* Identify( char* p, XMLNode** node );
virtual XMLNode* ShallowClone( XMLDocument* document ) const { return 0; } virtual XMLNode* ShallowClone( XMLDocument* /*document*/ ) const { return 0; }
virtual bool ShallowEqual( const XMLNode* compare ) const { return false; } virtual bool ShallowEqual( const XMLNode* /*compare*/ ) const { return false; }
private: private:
XMLDocument( const XMLDocument& ); // not supported XMLDocument( const XMLDocument& ); // not supported
@ -1080,6 +1087,7 @@ private:
void InitDocument(); void InitDocument();
bool writeBOM; bool writeBOM;
bool processEntities;
int errorID; int errorID;
const char* errorStr1; const char* errorStr1;
const char* errorStr2; const char* errorStr2;
@ -1196,6 +1204,7 @@ private:
FILE* fp; FILE* fp;
int depth; int depth;
int textDepth; int textDepth;
bool processEntities;
enum { enum {
ENTITY_RANGE = 64, ENTITY_RANGE = 64,

View File

@ -80,6 +80,9 @@
<ItemGroup> <ItemGroup>
<ClInclude Include="..\tinyxml2.h" /> <ClInclude Include="..\tinyxml2.h" />
</ItemGroup> </ItemGroup>
<ItemGroup>
<None Include="..\readme.txt" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">
</ImportGroup> </ImportGroup>

View File

@ -19,4 +19,9 @@
<Filter>Source Files</Filter> <Filter>Source Files</Filter>
</ClInclude> </ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup>
<None Include="..\readme.txt">
<Filter>Source Files</Filter>
</None>
</ItemGroup>
</Project> </Project>

View File

@ -3,9 +3,12 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <time.h>
#if defined( _MSC_VER ) #if defined( _MSC_VER )
#include <crtdbg.h> #include <crtdbg.h>
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
_CrtMemState startMemState; _CrtMemState startMemState;
_CrtMemState endMemState; _CrtMemState endMemState;
#endif #endif
@ -211,6 +214,8 @@ int main( int /*argc*/, const char* /*argv*/ )
//gNewTotal = gNew - newStart; //gNewTotal = gNew - newStart;
} }
{ {
const char* error = "<?xml version=\"1.0\" standalone=\"no\" ?>\n" const char* error = "<?xml version=\"1.0\" standalone=\"no\" ?>\n"
"<passages count=\"006\" formatversion=\"20020620\">\n" "<passages count=\"006\" formatversion=\"20020620\">\n"
@ -458,6 +463,24 @@ int main( int /*argc*/, const char* /*argv*/ )
fclose( textfile ); fclose( textfile );
} }
{
// Suppress entities.
const char* passages =
"<?xml version=\"1.0\" standalone=\"no\" ?>"
"<passages count=\"006\" formatversion=\"20020620\">"
"<psg context=\"Line 5 has &quot;quotation marks&quot; and &apos;apostrophe marks&apos;.\">Crazy &ttk;</psg>"
"</passages>";
XMLDocument doc( false );
doc.Parse( passages );
XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->Attribute( "context" ),
"Line 5 has &quot;quotation marks&quot; and &apos;apostrophe marks&apos;." );
XMLTest( "No entity parsing.", doc.FirstChildElement()->FirstChildElement()->FirstChild()->Value(),
"Crazy &ttk;" );
doc.Print();
}
{ {
const char* test = "<?xml version='1.0'?><a.elem xmi.version='2.0'/>"; const char* test = "<?xml version='1.0'?><a.elem xmi.version='2.0'/>";
@ -653,6 +676,62 @@ int main( int /*argc*/, const char* /*argv*/ )
XMLTest( "Clone and Equal", 4, count ); XMLTest( "Clone and Equal", 4, count );
} }
// ----------- Performance tracking --------------
{
#if defined( _MSC_VER )
__int64 start, end, freq;
QueryPerformanceFrequency( (LARGE_INTEGER*) &freq );
#endif
#if defined(_MSC_VER)
#pragma warning ( push )
#pragma warning ( disable : 4996 ) // Fail to see a compelling reason why this should be deprecated.
#endif
FILE* fp = fopen( "dream.xml", "r" );
#if defined(_MSC_VER)
#pragma warning ( pop )
#endif
fseek( fp, 0, SEEK_END );
long size = ftell( fp );
fseek( fp, 0, SEEK_SET );
char* mem = new char[size+1];
fread( mem, size, 1, fp );
fclose( fp );
mem[size] = 0;
#if defined( _MSC_VER )
QueryPerformanceCounter( (LARGE_INTEGER*) &start );
#else
clock_t cstart = clock();
#endif
static const int COUNT = 10;
for( int i=0; i<COUNT; ++i ) {
XMLDocument doc;
doc.Parse( mem );
}
#if defined( _MSC_VER )
QueryPerformanceCounter( (LARGE_INTEGER*) &end );
#else
clock_t cend = clock();
#endif
delete [] mem;
static const char* note =
#ifdef DEBUG
"DEBUG";
#else
"Release";
#endif
#if defined( _MSC_VER )
printf( "\nParsing %s of dream.xml: %.3f milli-seconds\n", note, 1000.0 * (double)(end-start) / ( (double)freq * (double)COUNT) );
#else
printf( "\nParsing %s of dream.xml: %.3f milli-seconds\n", note, (double)(cend - cstart)/(double)COUNT );
#endif
}
#if defined( _MSC_VER ) #if defined( _MSC_VER )
_CrtMemCheckpoint( &endMemState ); _CrtMemCheckpoint( &endMemState );
//_CrtMemDumpStatistics( &endMemState ); //_CrtMemDumpStatistics( &endMemState );