From 46a14cfec746e22b9dd540cad2e06f0e82ab054d Mon Sep 17 00:00:00 2001 From: "Lee Thomason (grinliz)" Date: Thu, 23 Feb 2012 22:27:28 -0800 Subject: [PATCH] new element loop --- tinyxml2.cpp | 344 +++++++++++++++++++-------------- tinyxml2.h | 12 +- xmltest.cpp | 534 ++++++++++++++++++++++++++------------------------- 3 files changed, 481 insertions(+), 409 deletions(-) diff --git a/tinyxml2.cpp b/tinyxml2.cpp index 1d68cb0..51cf795 100644 --- a/tinyxml2.cpp +++ b/tinyxml2.cpp @@ -18,17 +18,29 @@ static const char CR = CARRIAGE_RETURN; static const char SINGLE_QUOTE = '\''; static const char DOUBLE_QUOTE = '\"'; -// Bunch of unicode info at: -// http://www.unicode.org/faq/utf_bom.html -// ef bb bf (Microsoft "lead bytes") - designates UTF-8 - -static const unsigned char TIXML_UTF_LEAD_0 = 0xefU; -static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU; -static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU; +// Bunch of unicode info at: +// http://www.unicode.org/faq/utf_bom.html +// ef bb bf (Microsoft "lead bytes") - designates UTF-8 + +static const unsigned char TIXML_UTF_LEAD_0 = 0xefU; +static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU; +static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU; -#define DELETE_NODE( node ) { MemPool* pool = node->memPool; node->~XMLNode(); pool->Free( node ); } -#define DELETE_ATTRIBUTE( attrib ) { MemPool* pool = attrib->memPool; attrib->~XMLAttribute(); pool->Free( attrib ); } +#define DELETE_NODE( node ) { \ + if ( node ) { \ + MemPool* pool = node->memPool; \ + node->~XMLNode(); \ + pool->Free( node ); \ + } \ +} +#define DELETE_ATTRIBUTE( attrib ) { \ + if ( attrib ) { \ + MemPool* pool = attrib->memPool; \ + attrib->~XMLAttribute(); \ + pool->Free( attrib ); \ + } \ +} struct Entity { const char* pattern; @@ -229,114 +241,114 @@ const char* XMLUtil::ReadBOM( const char* p, bool* bom ) } -void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length ) -{ - const unsigned long BYTE_MASK = 0xBF; - const unsigned long BYTE_MARK = 0x80; - const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; - - if (input < 0x80) - *length = 1; - else if ( input < 0x800 ) - *length = 2; - else if ( input < 0x10000 ) - *length = 3; - else if ( input < 0x200000 ) - *length = 4; - else - { *length = 0; return; } // This code won't covert this correctly anyway. - - output += *length; - - // Scary scary fall throughs. - switch (*length) - { - case 4: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 3: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 2: - --output; - *output = (char)((input | BYTE_MARK) & BYTE_MASK); - input >>= 6; - case 1: - --output; - *output = (char)(input | FIRST_BYTE_MARK[*length]); - } -} - - -const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length ) -{ - // Presume an entity, and pull it out. - *length = 0; - - if ( *(p+1) == '#' && *(p+2) ) - { - unsigned long ucs = 0; - ptrdiff_t delta = 0; - unsigned mult = 1; - - if ( *(p+2) == 'x' ) - { - // Hexadecimal. - if ( !*(p+3) ) return 0; - - const char* q = p+3; - q = strchr( q, ';' ); - - if ( !q || !*q ) return 0; - - delta = q-p; - --q; - - while ( *q != 'x' ) - { - if ( *q >= '0' && *q <= '9' ) - ucs += mult * (*q - '0'); - else if ( *q >= 'a' && *q <= 'f' ) - ucs += mult * (*q - 'a' + 10); - else if ( *q >= 'A' && *q <= 'F' ) - ucs += mult * (*q - 'A' + 10 ); - else - return 0; - mult *= 16; - --q; - } - } - else - { - // Decimal. - if ( !*(p+2) ) return 0; - - const char* q = p+2; - q = strchr( q, ';' ); - - if ( !q || !*q ) return 0; - - delta = q-p; - --q; - - while ( *q != '#' ) - { - if ( *q >= '0' && *q <= '9' ) - ucs += mult * (*q - '0'); - else - return 0; - mult *= 10; - --q; - } - } - // convert the UCS to UTF-8 - ConvertUTF32ToUTF8( ucs, value, length ); - return p + delta + 1; - } - return p+1; -} +void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length ) +{ + const unsigned long BYTE_MASK = 0xBF; + const unsigned long BYTE_MARK = 0x80; + const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC }; + + if (input < 0x80) + *length = 1; + else if ( input < 0x800 ) + *length = 2; + else if ( input < 0x10000 ) + *length = 3; + else if ( input < 0x200000 ) + *length = 4; + else + { *length = 0; return; } // This code won't covert this correctly anyway. + + output += *length; + + // Scary scary fall throughs. + switch (*length) + { + case 4: + --output; + *output = (char)((input | BYTE_MARK) & BYTE_MASK); + input >>= 6; + case 3: + --output; + *output = (char)((input | BYTE_MARK) & BYTE_MASK); + input >>= 6; + case 2: + --output; + *output = (char)((input | BYTE_MARK) & BYTE_MASK); + input >>= 6; + case 1: + --output; + *output = (char)(input | FIRST_BYTE_MARK[*length]); + } +} + + +const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length ) +{ + // Presume an entity, and pull it out. + *length = 0; + + if ( *(p+1) == '#' && *(p+2) ) + { + unsigned long ucs = 0; + ptrdiff_t delta = 0; + unsigned mult = 1; + + if ( *(p+2) == 'x' ) + { + // Hexadecimal. + if ( !*(p+3) ) return 0; + + const char* q = p+3; + q = strchr( q, ';' ); + + if ( !q || !*q ) return 0; + + delta = q-p; + --q; + + while ( *q != 'x' ) + { + if ( *q >= '0' && *q <= '9' ) + ucs += mult * (*q - '0'); + else if ( *q >= 'a' && *q <= 'f' ) + ucs += mult * (*q - 'a' + 10); + else if ( *q >= 'A' && *q <= 'F' ) + ucs += mult * (*q - 'A' + 10 ); + else + return 0; + mult *= 16; + --q; + } + } + else + { + // Decimal. + if ( !*(p+2) ) return 0; + + const char* q = p+2; + q = strchr( q, ';' ); + + if ( !q || !*q ) return 0; + + delta = q-p; + --q; + + while ( *q != '#' ) + { + if ( *q >= '0' && *q <= '9' ) + ucs += mult * (*q - '0'); + else + return 0; + mult *= 10; + --q; + } + } + // convert the UCS to UTF-8 + ConvertUTF32ToUTF8( ucs, value, length ); + return p + delta + 1; + } + return p+1; +} char* XMLDocument::Identify( char* p, XMLNode** node ) @@ -397,6 +409,11 @@ char* XMLDocument::Identify( char* p, XMLNode** node ) returnNode = new (elementPool.Alloc()) XMLElement( this ); returnNode->memPool = &elementPool; p += elementHeaderLen; + + p = XMLUtil::SkipWhiteSpace( p ); + if ( p && *p == '/' ) { + ((XMLElement*)returnNode)->closingType = XMLElement::CLOSING; + } } else { returnNode = new (textPool.Alloc()) XMLText( this ); @@ -587,20 +604,75 @@ const XMLElement* XMLNode::LastChildElement( const char* value ) const char* XMLNode::ParseDeep( char* p ) { + // This is a recursive method, but thinking about it "at the current level" + // it is a pretty simple flat list: + // + // + // + // With a special case: + // + // + // + // + // Where the closing element (/foo) *must* be the next thing after the opening + // element, and the names must match. BUT the tricky bit is that the closing + // element will be read by the child. + while( p && *p ) { XMLNode* node = 0; - p = document->Identify( p, &node ); - if ( p && node ) { - p = node->ParseDeep( p ); + char* mark = p; - if ( node->IsClosingElement() ) { - if ( !XMLUtil::StringEqual( Value(), node->Value() )) { - document->SetError( ERROR_MISMATCHED_ELEMENT, Value(), 0 ); - } + p = document->Identify( p, &node ); + if ( p == 0 ) { + break; + } + + // We read the end tag. Back up and return. + if ( node && node->ToElement() && node->ToElement()->ClosingType() == XMLElement::CLOSING ) { + DELETE_NODE( node ); + return mark; + } + + if ( node ) { + p = node->ParseDeep( p ); + if ( !p ) { DELETE_NODE( node ); - return p; + node = 0; + break; + } + + XMLElement* ele = node->ToElement(); + if ( ele && ele->ClosingType() == XMLElement::OPEN ) { + XMLNode* closingNode = 0; + p = document->Identify( p, &closingNode ); + XMLElement* closingEle = closingNode ? closingNode->ToElement() : 0; + + if ( closingEle == 0 ) { + document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 ); + p = 0; + } + else if ( closingEle->ClosingType() != XMLElement::CLOSING ) { + document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 ); + p = 0; + } + else + { + p = closingEle->ParseDeep( p ); + if ( !XMLUtil::StringEqual( closingEle->Value(), node->Value() )) { + document->SetError( ERROR_MISMATCHED_ELEMENT, node->Value(), 0 ); + p = 0; + } + } + // Else everything is fine, but we need to throw away the node. + DELETE_NODE( closingNode ); + if ( p == 0 ) { + DELETE_NODE( node ); + node = 0; + } + } + if ( node ) { + this->InsertEndChild( node ); } - this->InsertEndChild( node ); } } return 0; @@ -736,7 +808,7 @@ char* XMLAttribute::ParseDeep( char* p ) char endTag[2] = { *p, 0 }; ++p; p = value.ParseText( p, endTag, StrPair::ATTRIBUTE_VALUE ); - if ( value.Empty() ) return 0; + //if ( value.Empty() ) return 0; return p; } @@ -842,9 +914,8 @@ void XMLAttribute::SetAttribute( float v ) // --------- XMLElement ---------- // XMLElement::XMLElement( XMLDocument* doc ) : XMLNode( doc ), - closing( false ), + closingType( 0 ), rootAttribute( 0 ) - //lastAttribute( 0 ) { } @@ -937,10 +1008,9 @@ void XMLElement::DeleteAttribute( const char* name ) } -char* XMLElement::ParseAttributes( char* p, bool* closedElement ) +char* XMLElement::ParseAttributes( char* p ) { const char* start = p; - *closedElement = false; // Read the attributes. while( p ) { @@ -965,11 +1035,7 @@ char* XMLElement::ParseAttributes( char* p, bool* closedElement ) } // end of the tag else if ( *p == '/' && *(p+1) == '>' ) { - if ( closing ) { - document->SetError( ERROR_PARSING_ELEMENT, start, p ); - return 0; - } - *closedElement = true; + closingType = CLOSED; return p+2; // done; sealed element. } // end of the tag @@ -1001,7 +1067,7 @@ char* XMLElement::ParseDeep( char* p ) // parsed just like a regular element then deleted from // the DOM. if ( *p == '/' ) { - closing = true; + closingType = CLOSING; ++p; } @@ -1009,8 +1075,8 @@ char* XMLElement::ParseDeep( char* p ) if ( value.Empty() ) return 0; bool elementClosed=false; - p = ParseAttributes( p, &elementClosed ); - if ( !p || !*p || elementClosed || closing ) + p = ParseAttributes( p ); + if ( !p || !*p || closingType ) return p; p = XMLNode::ParseDeep( p ); diff --git a/tinyxml2.h b/tinyxml2.h index 7185472..0e15231 100644 --- a/tinyxml2.h +++ b/tinyxml2.h @@ -463,7 +463,6 @@ public: virtual bool Accept( XMLVisitor* visitor ) const = 0; virtual char* ParseDeep( char* ); - virtual bool IsClosingElement() const { return false; } protected: XMLNode( XMLDocument* ); @@ -681,7 +680,12 @@ public: const char* GetText() const; // internal: - virtual bool IsClosingElement() const { return closing; } + enum { + OPEN, // + CLOSED, // + CLOSING // + }; + int ClosingType() const { return closingType; } char* ParseDeep( char* p ); private: @@ -693,9 +697,9 @@ private: XMLAttribute* FindAttribute( const char* name ); XMLAttribute* FindOrCreateAttribute( const char* name ); void LinkAttribute( XMLAttribute* attrib ); - char* ParseAttributes( char* p, bool *closedElement ); + char* ParseAttributes( char* p ); - bool closing; + int closingType; XMLAttribute* rootAttribute; }; diff --git a/xmltest.cpp b/xmltest.cpp index d244efa..40ac1a9 100644 --- a/xmltest.cpp +++ b/xmltest.cpp @@ -129,7 +129,7 @@ int main( int argc, const char* argv ) printf( "----------------------------------------------\n" ); } } - +#if 1 { static const char* test = ""; - XMLDocument doc; - doc.Parse( doctype ); - - XMLComment* comment = doc.FirstChild()->ToComment(); - - XMLTest( "Comment formatting.", " Somewhat ", comment->Value() ); - } - { - // Double attributes - const char* doctype = ""; - - XMLDocument doc; - doc.Parse( doctype ); - - XMLTest( "Parsing repeated attributes.", ERROR_PARSING_ATTRIBUTE, doc.ErrorID() ); // is an error to tinyxml (didn't use to be, but caused issues) - } - - { - // Embedded null in stream. - const char* doctype = ""; - - XMLDocument doc; - doc.Parse( doctype ); - XMLTest( "Embedded null throws error.", true, doc.Error() ); - } - - { - // Empty documents should return TIXML_ERROR_PARSING_EMPTY, bug 1070717 - const char* str = " "; - XMLDocument doc; - doc.Parse( str ); - XMLTest( "Empty document error", ERROR_EMPTY_DOCUMENT, doc.ErrorID() ); - } + // Entities not being written correctly. + // From Lynn Allen - { - // Low entities - XMLDocument doc; - doc.Parse( "" ); - const char result[] = { 0x0e, 0 }; - XMLTest( "Low entities.", doc.FirstChildElement()->GetText(), result ); - doc.Print(); - } + const char* passages = + "" + "" + " " + ""; - { - // Attribute values with trailing quotes not handled correctly - XMLDocument doc; - doc.Parse( "" ); - XMLTest( "Throw error with bad end quotes.", doc.Error(), true ); - } + XMLDocument doc; + doc.Parse( passages ); + XMLElement* psg = doc.RootElement()->FirstChildElement(); + const char* context = psg->Attribute( "context" ); + const char* expected = "Line 5 has \"quotation marks\" and 'apostrophe marks'. It also has <, >, and &, as well as a fake copyright \xC2\xA9."; - { - // [ 1663758 ] Failure to report error on bad XML - XMLDocument xml; - xml.Parse(""); - XMLTest("Missing end tag at end of input", xml.Error(), true); - xml.Parse(" "); - XMLTest("Missing end tag with trailing whitespace", xml.Error(), true); - xml.Parse(""); - XMLTest("Mismatched tags", xml.ErrorID(), ERROR_MISMATCHED_ELEMENT); - } - - - { - // [ 1475201 ] TinyXML parses entities in comments - XMLDocument xml; - xml.Parse("" - "" ); - - XMLNode* e0 = xml.FirstChild(); - XMLNode* e1 = e0->NextSibling(); - XMLComment* c0 = e0->ToComment(); - XMLComment* c1 = e1->ToComment(); - - XMLTest( "Comments ignore entities.", " declarations for & ", c0->Value(), true ); - XMLTest( "Comments ignore entities.", " far & away ", c1->Value(), true ); - } - - { - XMLDocument xml; - xml.Parse( "" - "" - "" - "" - "" ); - int count = 0; - - for( XMLNode* ele = xml.FirstChildElement( "Parent" )->FirstChild(); - ele; - ele = ele->NextSibling() ) - { - ++count; - } - - XMLTest( "Comments iterate correctly.", 3, count ); - } - - { - // trying to repro ]1874301]. If it doesn't go into an infinite loop, all is well. - unsigned char buf[] = " " ); - XMLTest( "Handle end tag whitespace", false, xml.Error() ); - } - - { - // This one must not result in an infinite loop - XMLDocument xml; - xml.Parse( "loop" ); - XMLTest( "Infinite loop test.", true, true ); - } + XMLTest( "Entity transformation: read. ", expected, context, true ); + FILE* textfile = fopen( "textfile.txt", "w" ); + if ( textfile ) + { + XMLStreamer streamer( textfile ); + psg->Accept( &streamer ); + fclose( textfile ); + } + textfile = fopen( "textfile.txt", "r" ); + TIXMLASSERT( textfile ); + if ( textfile ) + { + char buf[ 1024 ]; + fgets( buf, 1024, textfile ); + XMLTest( "Entity transformation: write. ", + "\n", + buf, false ); + } + fclose( textfile ); + } + + { + const char* test = ""; + + XMLDocument doc; + doc.Parse( test ); + XMLTest( "dot in names", doc.Error(), 0); + XMLTest( "dot in names", doc.FirstChildElement()->Name(), "a.elem" ); + XMLTest( "dot in names", doc.FirstChildElement()->Attribute( "xmi.version" ), "2.0" ); + } + + { + const char* test = "1.1 Start easy ignore fin thickness "; + + XMLDocument doc; + doc.Parse( test ); + + XMLText* text = doc.FirstChildElement()->FirstChildElement()->FirstChild()->ToText(); + XMLTest( "Entity with one digit.", + text->Value(), "1.1 Start easy ignore fin thickness\n", + false ); + } + + { + // DOCTYPE not preserved (950171) + // + const char* doctype = + "" + "" + "" + "" + ""; + + XMLDocument doc; + doc.Parse( doctype ); + doc.SaveFile( "test7.xml" ); + doc.DeleteChild( doc.RootElement() ); + doc.LoadFile( "test7.xml" ); + doc.Print(); + + const XMLUnknown* decl = doc.FirstChild()->NextSibling()->ToUnknown(); + XMLTest( "Correct value of unknown.", "DOCTYPE PLAY SYSTEM 'play.dtd'", decl->Value() ); + + } + + { + // Comments do not stream out correctly. + const char* doctype = + ""; + XMLDocument doc; + doc.Parse( doctype ); + + XMLComment* comment = doc.FirstChild()->ToComment(); + + XMLTest( "Comment formatting.", " Somewhat ", comment->Value() ); + } + { + // Double attributes + const char* doctype = ""; + + XMLDocument doc; + doc.Parse( doctype ); + + XMLTest( "Parsing repeated attributes.", ERROR_PARSING_ATTRIBUTE, doc.ErrorID() ); // is an error to tinyxml (didn't use to be, but caused issues) + } + + { + // Embedded null in stream. + const char* doctype = ""; + + XMLDocument doc; + doc.Parse( doctype ); + XMLTest( "Embedded null throws error.", true, doc.Error() ); + } + + { + // Empty documents should return TIXML_ERROR_PARSING_EMPTY, bug 1070717 + const char* str = " "; + XMLDocument doc; + doc.Parse( str ); + XMLTest( "Empty document error", ERROR_EMPTY_DOCUMENT, doc.ErrorID() ); + } + + { + // Low entities + XMLDocument doc; + doc.Parse( "" ); + const char result[] = { 0x0e, 0 }; + XMLTest( "Low entities.", doc.FirstChildElement()->GetText(), result ); + doc.Print(); + } + + { + // Attribute values with trailing quotes not handled correctly + XMLDocument doc; + doc.Parse( "" ); + XMLTest( "Throw error with bad end quotes.", doc.Error(), true ); + } + + { + // [ 1663758 ] Failure to report error on bad XML + XMLDocument xml; + xml.Parse(""); + XMLTest("Missing end tag at end of input", xml.Error(), true); + xml.Parse(" "); + XMLTest("Missing end tag with trailing whitespace", xml.Error(), true); + xml.Parse(""); + XMLTest("Mismatched tags", xml.ErrorID(), ERROR_MISMATCHED_ELEMENT); + } + + + { + // [ 1475201 ] TinyXML parses entities in comments + XMLDocument xml; + xml.Parse("" + "" ); + + XMLNode* e0 = xml.FirstChild(); + XMLNode* e1 = e0->NextSibling(); + XMLComment* c0 = e0->ToComment(); + XMLComment* c1 = e1->ToComment(); + + XMLTest( "Comments ignore entities.", " declarations for & ", c0->Value(), true ); + XMLTest( "Comments ignore entities.", " far & away ", c1->Value(), true ); + } + + { + XMLDocument xml; + xml.Parse( "" + "" + "" + "" + "" ); + xml.Print(); + + int count = 0; + + for( XMLNode* ele = xml.FirstChildElement( "Parent" )->FirstChild(); + ele; + ele = ele->NextSibling() ) + { + ++count; + } + + XMLTest( "Comments iterate correctly.", 3, count ); + } + + { + // trying to repro ]1874301]. If it doesn't go into an infinite loop, all is well. + unsigned char buf[] = " " ); + XMLTest( "Handle end tag whitespace", false, xml.Error() ); + } + + { + // This one must not result in an infinite loop + XMLDocument xml; + xml.Parse( "loop" ); + XMLTest( "Infinite loop test.", true, true ); + } +#endif #if defined( WIN32 ) _CrtMemCheckpoint( &endMemState ); //_CrtMemDumpStatistics( &endMemState );