new iteration of the parser to handle internal comments

This commit is contained in:
Lee Thomason 2014-10-11 16:49:58 -07:00
parent 282285a069
commit c3cdfa6349
4 changed files with 51 additions and 23 deletions

View File

@ -2,6 +2,7 @@
<!DOCTYPE Dictionary [ <!DOCTYPE Dictionary [
<!-- Test of the ability to parse internally defined DTD --> <!-- Test of the ability to parse internally defined DTD -->
<!-- Test for comment nastiness: <]> -->
<!ELEMENT Dictionary (Storage+) > <!ELEMENT Dictionary (Storage+) >
<!ELEMENT Storage (WordFile+, WordList?) > <!ELEMENT Storage (WordFile+, WordList?) >
<!ELEMENT Growth EMPTY > <!ELEMENT Growth EMPTY >

View File

@ -1036,19 +1036,41 @@ char* XMLDtd::ParseDeep( char* p, StrPair* )
{ {
// Dtd parses as text. // Dtd parses as text.
char* start = p; char* start = p;
bool endFound = false;
int brackets = 0; // trackes whether we are inside in inline DTD
// Find closing '>', skipping over any local definition contained between '[' and ']' while(*p) {
// Skip over comments.
if (strncmp(p, "<!--", 4) == 0) {
p = strstr(p+4, "-->");
if (p) {
p += 3;
}
}
if (!p) break;
while( *p && *p != '>' && *p != '[') ++p; // Skip over defs
if (brackets && strncmp(p, "<!", 2) == 0 ) {
p = strstr(p+2, ">");
if (p) {
p++;
}
}
if (!p) break;
if ( *p == '[' ) if (brackets == 0 && *p == '>' ) {
{ endFound = true;
while( *p && *p != ']' ) ++p; break;
while( *p && *p != '>' ) ++p; }
if (*p == '[' )
++brackets;
if (*p == ']' )
--brackets;
++p;
} }
if ( *p != '>' ) { if (!endFound || !p || !*p) {
_document->SetError( XML_ERROR_PARSING_UNKNOWN, start, 0 ); _document->SetError( XML_ERROR_PARSING_DTD, start, 0 );
} }
_value.Set(start, p, StrPair::NEEDS_NEWLINE_NORMALIZATION ); _value.Set(start, p, StrPair::NEEDS_NEWLINE_NORMALIZATION );
@ -1069,8 +1091,8 @@ XMLNode* XMLDtd::ShallowClone( XMLDocument* doc ) const
bool XMLDtd::ShallowEqual( const XMLNode* compare ) const bool XMLDtd::ShallowEqual( const XMLNode* compare ) const
{ {
const XMLDtd* unknown = compare->ToDtd(); const XMLDtd* dtd = compare->ToDtd();
return ( unknown && XMLUtil::StringEqual( unknown->Value(), Value() )); return ( dtd && XMLUtil::StringEqual( dtd->Value(), Value() ));
} }
@ -1658,6 +1680,7 @@ const char* XMLDocument::_errorNames[XML_ERROR_COUNT] = {
"XML_ERROR_PARSING_COMMENT", "XML_ERROR_PARSING_COMMENT",
"XML_ERROR_PARSING_DECLARATION", "XML_ERROR_PARSING_DECLARATION",
"XML_ERROR_PARSING_UNKNOWN", "XML_ERROR_PARSING_UNKNOWN",
"XML_ERROR_PARSING_DTD",
"XML_ERROR_EMPTY_DOCUMENT", "XML_ERROR_EMPTY_DOCUMENT",
"XML_ERROR_MISMATCHED_ELEMENT", "XML_ERROR_MISMATCHED_ELEMENT",
"XML_ERROR_PARSING", "XML_ERROR_PARSING",

View File

@ -485,7 +485,7 @@ public:
} }
}; };
// WARNING: must match XMLErrorNames[] // WARNING: must match XMLDocument::_errorNames[]
enum XMLError { enum XMLError {
XML_SUCCESS = 0, XML_SUCCESS = 0,
XML_NO_ERROR = 0, XML_NO_ERROR = 0,
@ -503,6 +503,7 @@ enum XMLError {
XML_ERROR_PARSING_COMMENT, XML_ERROR_PARSING_COMMENT,
XML_ERROR_PARSING_DECLARATION, XML_ERROR_PARSING_DECLARATION,
XML_ERROR_PARSING_UNKNOWN, XML_ERROR_PARSING_UNKNOWN,
XML_ERROR_PARSING_DTD,
XML_ERROR_EMPTY_DOCUMENT, XML_ERROR_EMPTY_DOCUMENT,
XML_ERROR_MISMATCHED_ELEMENT, XML_ERROR_MISMATCHED_ELEMENT,
XML_ERROR_PARSING, XML_ERROR_PARSING,
@ -1006,8 +1007,11 @@ protected:
/** The <!DOCTYPE> structure can contain internal definition that /** The <!DOCTYPE> structure can contain internal definition that
may contains other <!xxx ...> entities. (Otherwise, these could may contains other <!xxx ...> entities. (Otherwise, these could
be handled as XMLUnknown nodes.) be handled as XMLUnknown nodes.)
It will be written back to the XML, unchanged, when the file
is saved. TinyXML-2 does not interpret, enforce, or even parse the DTD.
The XMLDTD tag will be written back to the XML, unchanged,
when the file is saved.
*/ */
class TINYXML2_LIB XMLDtd : public XMLNode class TINYXML2_LIB XMLDtd : public XMLNode