another rev of text parsing.

This commit is contained in:
Lee Thomason 2012-01-14 18:08:12 -08:00
parent 85403d8935
commit fde6a756d1
4 changed files with 59 additions and 23 deletions

View File

@ -7,6 +7,12 @@
using namespace tinyxml2; using namespace tinyxml2;
static const char LINE_FEED = (char)0x0a; // all line endings are normalized to LF
static const char LF = LINE_FEED;
static const char CARRIAGE_RETURN = (char)0x0d; // CR gets filtered out
static const char CR = CARRIAGE_RETURN;
// --------- CharBuffer ----------- // // --------- CharBuffer ----------- //
/*static*/ CharBuffer* CharBuffer::Construct( const char* in ) /*static*/ CharBuffer* CharBuffer::Construct( const char* in )
{ {
@ -91,25 +97,54 @@ const char* XMLNode::ParseText( char* p, const char* endTag, char** next )
{ {
TIXMLASSERT( endTag && *endTag ); TIXMLASSERT( endTag && *endTag );
char* start = SkipWhiteSpace( p ); char* start = p;
if ( !start ) char* q = p; // q (target) <= p (src) in same buffer.
return 0;
char endChar = *endTag; char endChar = *endTag;
p = start;
int length = strlen( endTag ); int length = strlen( endTag );
char* nextTag = 0;
// Inner loop of text parsing.
while ( *p ) { while ( *p ) {
if ( *p == endChar ) { if ( *p == endChar && strncmp( p, endTag, length ) == 0 ) {
if ( strncmp( p, endTag, length ) == 0 ) { *q = 0;
*p = 0; nextTag = p + length;
*next = p + length; break;
return start;
} }
else if ( *p == CR ) {
// CR-LF pair becomes LF
// CR alone becomes LF
// LF-CR becomes LF
if ( *(p+1) == LF ) {
p += 2;
} }
else {
++p; ++p;
} }
*q = LF;
}
else if ( *p == LF ) {
if ( *(p+1) == CR ) {
p += 2;
}
else {
++p;
}
*q = LF;
}
else {
*q = *p;
++p;
}
++q;
}
// Error? If we don't have a text tag, something went wrong. (Although
// what the nextTag points at may be null.)
if ( nextTag == 0 ) {
return 0; return 0;
}
*next = nextTag;
return start;
} }
@ -129,7 +164,7 @@ XMLComment::~XMLComment()
void XMLComment::Print( FILE* fp, int depth ) void XMLComment::Print( FILE* fp, int depth )
{ {
XMLNode::Print( fp, depth ); XMLNode::Print( fp, depth );
fprintf( fp, "<!-- %s -->\n", value ); fprintf( fp, "<!--%s-->\n", value );
} }

View File

@ -68,13 +68,13 @@ protected:
} }
return false; return false;
} }
inline static int IsUTF8Continuation( char p ) { return p & 0x80; }
/* Parses text. (Not a text node.) /* Parses text. (Not a text node.)
- [ ] EOL normalization. - [ ] EOL normalization.
- [x] Trim leading whitespace - [X] Do not trim leading whitespace
- [ ] Trim trailing whitespace. - [X] Do not trim trailing whitespace.
- [ ] Leaves inner whitespace - [X] Leaves inner whitespace
- [ ] Inserts one space between lines.
*/ */
const char* ParseText( char* in, const char* endTag, char** next ); const char* ParseText( char* in, const char* endTag, char** next );

Binary file not shown.

View File

@ -7,6 +7,7 @@ using namespace tinyxml2;
int main( int argc, const char* argv ) int main( int argc, const char* argv )
{ {
#if 0
{ {
static const char* test = "<!--hello world-->"; static const char* test = "<!--hello world-->";
@ -14,14 +15,14 @@ int main( int argc, const char* argv )
doc.Parse( test ); doc.Parse( test );
doc.Print( stdout ); doc.Print( stdout );
} }
/* #endif
{ {
static const char* test = "<hello></hello>"; static const char* test = "<!--hello world\n"
" line 2\r-->";
XMLDocument doc; XMLDocument doc;
doc.Parse( test ); doc.Parse( test );
doc.Print( stdout ); doc.Print( stdout );
} }
*/
return 0; return 0;
} }