new iteration of the parser to handle internal comments

This commit is contained in:
Lee Thomason 2014-10-11 16:49:58 -07:00
parent 282285a069
commit c3cdfa6349
4 changed files with 51 additions and 23 deletions

View File

@ -2,6 +2,7 @@
<!DOCTYPE Dictionary [
<!-- Test of the ability to parse internally defined DTD -->
<!-- Test for comment nastiness: <]> -->
<!ELEMENT Dictionary (Storage+) >
<!ELEMENT Storage (WordFile+, WordList?) >
<!ELEMENT Growth EMPTY >

View File

@ -1036,19 +1036,41 @@ char* XMLDtd::ParseDeep( char* p, StrPair* )
{
// Dtd parses as text.
char* start = p;
bool endFound = false;
int brackets = 0; // trackes whether we are inside in inline DTD
// Find closing '>', skipping over any local definition contained between '[' and ']'
while(*p) {
// Skip over comments.
if (strncmp(p, "<!--", 4) == 0) {
p = strstr(p+4, "-->");
if (p) {
p += 3;
}
}
if (!p) break;
while( *p && *p != '>' && *p != '[') ++p;
if ( *p == '[' )
{
while( *p && *p != ']' ) ++p;
while( *p && *p != '>' ) ++p;
}
// Skip over defs
if (brackets && strncmp(p, "<!", 2) == 0 ) {
p = strstr(p+2, ">");
if (p) {
p++;
}
}
if (!p) break;
if ( *p != '>' ) {
_document->SetError( XML_ERROR_PARSING_UNKNOWN, start, 0 );
if (brackets == 0 && *p == '>' ) {
endFound = true;
break;
}
if (*p == '[' )
++brackets;
if (*p == ']' )
--brackets;
++p;
}
if (!endFound || !p || !*p) {
_document->SetError( XML_ERROR_PARSING_DTD, start, 0 );
}
_value.Set(start, p, StrPair::NEEDS_NEWLINE_NORMALIZATION );
@ -1069,8 +1091,8 @@ XMLNode* XMLDtd::ShallowClone( XMLDocument* doc ) const
bool XMLDtd::ShallowEqual( const XMLNode* compare ) const
{
const XMLDtd* unknown = compare->ToDtd();
return ( unknown && XMLUtil::StringEqual( unknown->Value(), Value() ));
const XMLDtd* dtd = compare->ToDtd();
return ( dtd && XMLUtil::StringEqual( dtd->Value(), Value() ));
}
@ -1658,6 +1680,7 @@ const char* XMLDocument::_errorNames[XML_ERROR_COUNT] = {
"XML_ERROR_PARSING_COMMENT",
"XML_ERROR_PARSING_DECLARATION",
"XML_ERROR_PARSING_UNKNOWN",
"XML_ERROR_PARSING_DTD",
"XML_ERROR_EMPTY_DOCUMENT",
"XML_ERROR_MISMATCHED_ELEMENT",
"XML_ERROR_PARSING",

View File

@ -485,7 +485,7 @@ public:
}
};
// WARNING: must match XMLErrorNames[]
// WARNING: must match XMLDocument::_errorNames[]
enum XMLError {
XML_SUCCESS = 0,
XML_NO_ERROR = 0,
@ -503,6 +503,7 @@ enum XMLError {
XML_ERROR_PARSING_COMMENT,
XML_ERROR_PARSING_DECLARATION,
XML_ERROR_PARSING_UNKNOWN,
XML_ERROR_PARSING_DTD,
XML_ERROR_EMPTY_DOCUMENT,
XML_ERROR_MISMATCHED_ELEMENT,
XML_ERROR_PARSING,
@ -1004,10 +1005,13 @@ protected:
/** The <!DOCTYPE> structure can contain internal definition that
may contains other <!xxx ...> entities. (Otherwise, these could
be handled as XMLUnknown nodes.)
It will be written back to the XML, unchanged, when the file
is saved.
may contains other <!xxx ...> entities. (Otherwise, these could
be handled as XMLUnknown nodes.)
TinyXML-2 does not interpret, enforce, or even parse the DTD.
The XMLDTD tag will be written back to the XML, unchanged,
when the file is saved.
*/
class TINYXML2_LIB XMLDtd : public XMLNode

View File

@ -486,18 +486,18 @@ int main( int argc, const char ** argv )
doc.PrintError();
XMLTest( "Dictionary", "xml version=\"1.0\" encoding=\"UTF-8\"",
doc.FirstChild()->ToDeclaration()->Value() );
doc.FirstChild()->ToDeclaration()->Value() );
XMLTest( "Dictionary", true, doc.FirstChild()->NextSibling()->ToDtd() ? true : false );
XMLTest( "Dictionary", "500M",
doc.LastChild()->LastChild()->FirstChild()->ToElement()->Attribute("size") );
XMLTest( "Dictionary", "500M",
doc.LastChild()->LastChild()->FirstChild()->ToElement()->Attribute("size") );
XMLDocument doc2;
doc2.LoadFile( "resources/out/dictionaryout.xml" );
XMLTest( "Dictionary-out", "xml version=\"1.0\" encoding=\"UTF-8\"",
doc2.FirstChild()->ToDeclaration()->Value() );
doc2.FirstChild()->ToDeclaration()->Value() );
XMLTest( "Dictionary-out", true, doc2.FirstChild()->NextSibling()->ToDtd() ? true : false );
XMLTest( "Dictionary", "500M",
doc2.LastChild()->LastChild()->FirstChild()->ToElement()->Attribute("size") );
XMLTest( "Dictionary", "500M",
doc2.LastChild()->LastChild()->FirstChild()->ToElement()->Attribute("size") );
}