added whitespace=collapse support. tests work. code needs review

This commit is contained in:
Lee Thomason (grinliz) 2012-08-20 22:00:38 -07:00
parent 77d7f206f6
commit bc1bfb7f27
3 changed files with 90 additions and 13 deletions

View File

@ -23,10 +23,12 @@ distribution.
#include "tinyxml2.h" #include "tinyxml2.h"
#include <cstdio> #include <new> // yes, this one new style header, is in the Android SDK.
#include <cstdlib> #ifdef ANDROID_NDK
#include <new> #include <stddef.h>
#else
#include <cstddef> #include <cstddef>
#endif
using namespace tinyxml2; using namespace tinyxml2;
@ -156,6 +158,31 @@ char* StrPair::ParseName( char* p )
} }
void StrPair::CollapseWhitespace()
{
// Trim leading space.
start = XMLUtil::SkipWhiteSpace( start );
if ( start && *start ) {
char* p = start; // the read pointer
char* q = start; // the write pointer
while( *p ) {
if ( XMLUtil::IsWhiteSpace( *p )) {
p = XMLUtil::SkipWhiteSpace( p );
if ( *p == 0 )
break; // don't write to q; this trims the trailing space.
*q = ' ';
++q;
}
*q = *p;
++q;
++p;
}
*q = 0;
}
}
const char* StrPair::GetStr() const char* StrPair::GetStr()
{ {
@ -232,6 +259,11 @@ const char* StrPair::GetStr()
} }
*q = 0; *q = 0;
} }
// The loop below has plenty going on, and this
// is a less useful mode. Break it out.
if ( flags & COLLAPSE_WHITESPACE ) {
CollapseWhitespace();
}
flags = (flags & NEEDS_DELETE); flags = (flags & NEEDS_DELETE);
} }
return start; return start;
@ -815,7 +847,11 @@ char* XMLText::ParseDeep( char* p, StrPair* )
return p; return p;
} }
else { else {
p = value.ParseText( p, "<", document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES ); int flags = document->ProcessEntities() ? StrPair::TEXT_ELEMENT : StrPair::TEXT_ELEMENT_LEAVE_ENTITIES;
if ( document->WhitespaceMode() == COLLAPSE_WHITESPACE )
flags |= StrPair::COLLAPSE_WHITESPACE;
p = value.ParseText( p, "<", flags );
if ( !p ) { if ( !p ) {
document->SetError( XML_ERROR_PARSING_TEXT, start, 0 ); document->SetError( XML_ERROR_PARSING_TEXT, start, 0 );
} }
@ -1416,11 +1452,12 @@ bool XMLElement::Accept( XMLVisitor* visitor ) const
// --------- XMLDocument ----------- // // --------- XMLDocument ----------- //
XMLDocument::XMLDocument( bool _processEntities ) : XMLDocument::XMLDocument( bool _processEntities, Whitespace _whitespace ) :
XMLNode( 0 ), XMLNode( 0 ),
writeBOM( false ), writeBOM( false ),
processEntities( _processEntities ), processEntities( _processEntities ),
errorID( 0 ), errorID( 0 ),
whitespace( _whitespace ),
errorStr1( 0 ), errorStr1( 0 ),
errorStr2( 0 ), errorStr2( 0 ),
charBuffer( 0 ) charBuffer( 0 )

View File

@ -24,11 +24,21 @@ distribution.
#ifndef TINYXML2_INCLUDED #ifndef TINYXML2_INCLUDED
#define TINYXML2_INCLUDED #define TINYXML2_INCLUDED
#ifdef ANDROID_NDK
#include <ctype.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#else
#include <cctype> #include <cctype>
#include <climits> #include <climits>
#include <cstdio> #include <cstdio>
#include <cstdlib>
#include <cstring> #include <cstring>
#include <cstdarg> #include <cstdarg>
#endif
/* /*
TODO: intern strings instead of allocation. TODO: intern strings instead of allocation.
@ -112,6 +122,7 @@ public:
enum { enum {
NEEDS_ENTITY_PROCESSING = 0x01, NEEDS_ENTITY_PROCESSING = 0x01,
NEEDS_NEWLINE_NORMALIZATION = 0x02, NEEDS_NEWLINE_NORMALIZATION = 0x02,
COLLAPSE_WHITESPACE = 0x04,
TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION, TEXT_ELEMENT = NEEDS_ENTITY_PROCESSING | NEEDS_NEWLINE_NORMALIZATION,
TEXT_ELEMENT_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION, TEXT_ELEMENT_LEAVE_ENTITIES = NEEDS_NEWLINE_NORMALIZATION,
@ -140,6 +151,7 @@ public:
private: private:
void Reset(); void Reset();
void CollapseWhitespace();
enum { enum {
NEEDS_FLUSH = 0x100, NEEDS_FLUSH = 0x100,
@ -365,6 +377,7 @@ public:
// correct, but simple, and usually works. // correct, but simple, and usually works.
static const char* SkipWhiteSpace( const char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<const unsigned char*>(p) ) ) { ++p; } return p; } static const char* SkipWhiteSpace( const char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<const unsigned char*>(p) ) ) { ++p; } return p; }
static char* SkipWhiteSpace( char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<unsigned char*>(p) ) ) { ++p; } return p; } static char* SkipWhiteSpace( char* p ) { while( !IsUTF8Continuation(*p) && isspace( *reinterpret_cast<unsigned char*>(p) ) ) { ++p; } return p; }
static bool IsWhiteSpace( char p ) { return !IsUTF8Continuation(p) && isspace( static_cast<unsigned char>(p) ); }
inline static bool StringEqual( const char* p, const char* q, int nChar=INT_MAX ) { inline static bool StringEqual( const char* p, const char* q, int nChar=INT_MAX ) {
int n = 0; int n = 0;
@ -1031,6 +1044,12 @@ private:
}; };
enum Whitespace {
PRESERVE_WHITESPACE,
COLLAPSE_WHITESPACE
};
/** A Document binds together all the functionality. /** A Document binds together all the functionality.
It can be saved, loaded, and printed to the screen. It can be saved, loaded, and printed to the screen.
All Nodes are connected and allocated to a Document. All Nodes are connected and allocated to a Document.
@ -1041,7 +1060,7 @@ class XMLDocument : public XMLNode
friend class XMLElement; friend class XMLElement;
public: public:
/// constructor /// constructor
XMLDocument( bool processEntities = true ); XMLDocument( bool processEntities = true, Whitespace = PRESERVE_WHITESPACE );
~XMLDocument(); ~XMLDocument();
virtual XMLDocument* ToDocument() { return this; } virtual XMLDocument* ToDocument() { return this; }
@ -1087,6 +1106,7 @@ public:
int SaveFile( FILE* ); int SaveFile( FILE* );
bool ProcessEntities() const { return processEntities; } bool ProcessEntities() const { return processEntities; }
Whitespace WhitespaceMode() const { return whitespace; }
/** /**
Returns true if this document has a leading Byte Order Mark of UTF8. Returns true if this document has a leading Byte Order Mark of UTF8.
@ -1189,6 +1209,7 @@ private:
bool writeBOM; bool writeBOM;
bool processEntities; bool processEntities;
int errorID; int errorID;
Whitespace whitespace;
const char* errorStr1; const char* errorStr1;
const char* errorStr2; const char* errorStr2;
char* charBuffer; char* charBuffer;

View File

@ -938,6 +938,25 @@ int main( int /*argc*/, const char ** /*argv*/ )
XMLTest( "QueryBoolText", boolValue, true, false ); XMLTest( "QueryBoolText", boolValue, true, false );
} }
// ----------- Whitespace ------------
{
const char* xml = "<element>"
"<a> This \nis &apos; text &apos; </a>"
"<b> This is &apos; text &apos; \n</b>"
"<c>This is &apos; \n\n text &apos;</c>"
"</element>";
XMLDocument doc( true, COLLAPSE_WHITESPACE );
doc.Parse( xml );
const XMLElement* element = doc.FirstChildElement();
for( const XMLElement* parent = element->FirstChildElement();
parent;
parent = parent->NextSiblingElement() )
{
XMLTest( "Whitespace collapse", "This is ' text '", parent->GetText() );
}
}
// ----------- Performance tracking -------------- // ----------- Performance tracking --------------
{ {