another rev of text parsing.

This commit is contained in:
Lee Thomason 2012-01-14 18:08:12 -08:00
parent 85403d8935
commit fde6a756d1
4 changed files with 59 additions and 23 deletions

View File

@ -7,6 +7,12 @@
using namespace tinyxml2;
static const char LINE_FEED = (char)0x0a; // all line endings are normalized to LF
static const char LF = LINE_FEED;
static const char CARRIAGE_RETURN = (char)0x0d; // CR gets filtered out
static const char CR = CARRIAGE_RETURN;
// --------- CharBuffer ----------- //
/*static*/ CharBuffer* CharBuffer::Construct( const char* in )
{
@ -91,25 +97,54 @@ const char* XMLNode::ParseText( char* p, const char* endTag, char** next )
{
TIXMLASSERT( endTag && *endTag );
char* start = SkipWhiteSpace( p );
if ( !start )
return 0;
char endChar = *endTag;
p = start;
int length = strlen( endTag );
char* start = p;
char* q = p; // q (target) <= p (src) in same buffer.
char endChar = *endTag;
int length = strlen( endTag );
char* nextTag = 0;
// Inner loop of text parsing.
while ( *p ) {
if ( *p == endChar ) {
if ( strncmp( p, endTag, length ) == 0 ) {
*p = 0;
*next = p + length;
return start;
}
if ( *p == endChar && strncmp( p, endTag, length ) == 0 ) {
*q = 0;
nextTag = p + length;
break;
}
++p;
else if ( *p == CR ) {
// CR-LF pair becomes LF
// CR alone becomes LF
// LF-CR becomes LF
if ( *(p+1) == LF ) {
p += 2;
}
else {
++p;
}
*q = LF;
}
else if ( *p == LF ) {
if ( *(p+1) == CR ) {
p += 2;
}
else {
++p;
}
*q = LF;
}
else {
*q = *p;
++p;
}
++q;
}
return 0;
// Error? If we don't have a text tag, something went wrong. (Although
// what the nextTag points at may be null.)
if ( nextTag == 0 ) {
return 0;
}
*next = nextTag;
return start;
}
@ -129,7 +164,7 @@ XMLComment::~XMLComment()
void XMLComment::Print( FILE* fp, int depth )
{
XMLNode::Print( fp, depth );
fprintf( fp, "<!-- %s -->\n", value );
fprintf( fp, "<!--%s-->\n", value );
}

View File

@ -68,13 +68,13 @@ protected:
}
return false;
}
inline static int IsUTF8Continuation( char p ) { return p & 0x80; }
/* Parses text. (Not a text node.)
- [ ] EOL normalization.
- [x] Trim leading whitespace
- [ ] Trim trailing whitespace.
- [ ] Leaves inner whitespace
- [ ] Inserts one space between lines.
- [X] Do not trim leading whitespace
- [X] Do not trim trailing whitespace.
- [X] Leaves inner whitespace
*/
const char* ParseText( char* in, const char* endTag, char** next );

Binary file not shown.

View File

@ -7,6 +7,7 @@ using namespace tinyxml2;
int main( int argc, const char* argv )
{
#if 0
{
static const char* test = "<!--hello world-->";
@ -14,14 +15,14 @@ int main( int argc, const char* argv )
doc.Parse( test );
doc.Print( stdout );
}
/*
#endif
{
static const char* test = "<hello></hello>";
static const char* test = "<!--hello world\n"
" line 2\r-->";
XMLDocument doc;
doc.Parse( test );
doc.Print( stdout );
}
*/
return 0;
}