tinyxml2/tinyxml2.cpp

2792 lines
71 KiB
C++
Raw Permalink Normal View History

2012-02-26 05:11:20 +00:00
/*
Original code by Lee Thomason (www.grinninglizard.com)
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any
damages arising from the use of this software.
Permission is granted to anyone to use this software for any
purpose, including commercial applications, and to alter it and
redistribute it freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product documentation
would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and
must not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.
*/
2012-02-25 05:50:50 +00:00
2011-12-29 03:42:49 +00:00
#include "tinyxml2.h"
#include <new> // yes, this one new style header, is in the Android SDK.
2016-03-25 13:01:15 +00:00
#if defined(ANDROID_NDK) || defined(__BORLANDC__) || defined(__QNXNTO__)
# include <stddef.h>
2015-07-30 14:02:26 +00:00
# include <stdarg.h>
#else
# include <cstddef>
2015-07-30 14:02:26 +00:00
# include <cstdarg>
#endif
2011-12-29 03:42:49 +00:00
2015-06-12 05:52:08 +00:00
#if defined(_MSC_VER) && (_MSC_VER >= 1400 ) && (!defined WINCE)
// Microsoft Visual Studio, version 2005 and higher. Not WinCE.
2015-06-12 05:52:08 +00:00
/*int _snprintf_s(
char *buffer,
size_t sizeOfBuffer,
size_t count,
const char *format [,
argument] ...
);*/
2015-08-15 16:17:27 +00:00
static inline int TIXML_SNPRINTF( char* buffer, size_t size, const char* format, ... )
2015-06-12 05:52:08 +00:00
{
va_list va;
va_start( va, format );
int result = vsnprintf_s( buffer, size, _TRUNCATE, format, va );
va_end( va );
return result;
}
2015-08-15 16:17:27 +00:00
static inline int TIXML_VSNPRINTF( char* buffer, size_t size, const char* format, va_list va )
2015-06-12 05:52:08 +00:00
{
int result = vsnprintf_s( buffer, size, _TRUNCATE, format, va );
return result;
}
#define TIXML_VSCPRINTF _vscprintf
#define TIXML_SSCANF sscanf_s
#elif defined _MSC_VER
// Microsoft Visual Studio 2003 and earlier or WinCE
#define TIXML_SNPRINTF _snprintf
#define TIXML_VSNPRINTF _vsnprintf
#define TIXML_SSCANF sscanf
2015-06-19 23:52:40 +00:00
#if (_MSC_VER < 1400 ) && (!defined WINCE)
2015-06-12 05:52:08 +00:00
// Microsoft Visual Studio 2003 and not WinCE.
#define TIXML_VSCPRINTF _vscprintf // VS2003's C runtime has this, but VC6 C runtime or WinCE SDK doesn't have.
#else
// Microsoft Visual Studio 2003 and earlier or WinCE.
2015-08-15 16:17:27 +00:00
static inline int TIXML_VSCPRINTF( const char* format, va_list va )
2015-06-12 05:52:08 +00:00
{
int len = 512;
for (;;) {
len = len*2;
char* str = new char[len]();
const int required = _vsnprintf(str, len, format, va);
delete[] str;
if ( required != -1 ) {
2015-07-27 14:11:51 +00:00
TIXMLASSERT( required >= 0 );
2015-06-12 05:52:08 +00:00
len = required;
break;
}
}
2015-07-27 14:11:51 +00:00
TIXMLASSERT( len >= 0 );
2015-06-12 05:52:08 +00:00
return len;
}
#endif
#else
// GCC version 3 and higher
//#warning( "Using sn* functions." )
#define TIXML_SNPRINTF snprintf
#define TIXML_VSNPRINTF vsnprintf
2015-08-15 16:17:27 +00:00
static inline int TIXML_VSCPRINTF( const char* format, va_list va )
2015-06-12 05:52:08 +00:00
{
int len = vsnprintf( 0, 0, format, va );
2015-07-27 14:11:51 +00:00
TIXMLASSERT( len >= 0 );
2015-06-12 05:52:08 +00:00
return len;
}
#define TIXML_SSCANF sscanf
#endif
static const char LINE_FEED = (char)0x0a; // all line endings are normalized to LF
2012-01-15 02:08:12 +00:00
static const char LF = LINE_FEED;
static const char CARRIAGE_RETURN = (char)0x0d; // CR gets filtered out
static const char CR = CARRIAGE_RETURN;
static const char SINGLE_QUOTE = '\'';
static const char DOUBLE_QUOTE = '\"';
2012-01-15 02:08:12 +00:00
2012-02-24 06:27:28 +00:00
// Bunch of unicode info at:
// http://www.unicode.org/faq/utf_bom.html
// ef bb bf (Microsoft "lead bytes") - designates UTF-8
2012-02-21 17:08:12 +00:00
2012-02-24 06:27:28 +00:00
static const unsigned char TIXML_UTF_LEAD_0 = 0xefU;
static const unsigned char TIXML_UTF_LEAD_1 = 0xbbU;
static const unsigned char TIXML_UTF_LEAD_2 = 0xbfU;
2012-02-21 17:08:12 +00:00
namespace tinyxml2
{
2012-01-26 01:44:30 +00:00
struct Entity {
const char* pattern;
int length;
char value;
2012-01-26 01:44:30 +00:00
};
static const int NUM_ENTITIES = 5;
static const Entity entities[NUM_ENTITIES] = {
{ "quot", 4, DOUBLE_QUOTE },
{ "amp", 3, '&' },
{ "apos", 4, SINGLE_QUOTE },
{ "lt", 2, '<' },
{ "gt", 2, '>' }
2012-01-26 01:44:30 +00:00
};
2012-01-15 02:08:12 +00:00
StrPair::~StrPair()
{
Reset();
}
2014-11-28 06:31:11 +00:00
void StrPair::TransferTo( StrPair* other )
{
2014-11-28 06:31:11 +00:00
if ( this == other ) {
return;
}
// This in effect implements the assignment operator by "moving"
// ownership (as in auto_ptr).
2016-08-04 14:16:05 +00:00
TIXMLASSERT( other != 0 );
2014-11-28 06:31:11 +00:00
TIXMLASSERT( other->_flags == 0 );
TIXMLASSERT( other->_start == 0 );
TIXMLASSERT( other->_end == 0 );
2014-11-28 06:31:11 +00:00
other->Reset();
2014-11-28 06:31:11 +00:00
other->_flags = _flags;
other->_start = _start;
other->_end = _end;
_flags = 0;
_start = 0;
_end = 0;
}
2017-09-20 00:54:31 +00:00
void StrPair::Reset()
{
if ( _flags & NEEDS_DELETE ) {
delete [] _start;
}
_flags = 0;
_start = 0;
_end = 0;
}
void StrPair::SetStr( const char* str, int flags )
{
2015-12-09 08:54:06 +00:00
TIXMLASSERT( str );
Reset();
size_t len = strlen( str );
TIXMLASSERT( _start == 0 );
_start = new char[ len+1 ];
memcpy( _start, str, len+1 );
_end = _start + len;
_flags = flags | NEEDS_DELETE;
}
char* StrPair::ParseText( char* p, const char* endTag, int strFlags, int* curLineNumPtr )
{
2016-08-30 12:51:55 +00:00
TIXMLASSERT( p );
TIXMLASSERT( endTag && *endTag );
2016-12-24 15:34:39 +00:00
TIXMLASSERT(curLineNumPtr);
2014-08-25 07:05:55 +00:00
char* start = p;
char endChar = *endTag;
size_t length = strlen( endTag );
// Inner loop of text parsing.
while ( *p ) {
if ( *p == endChar && strncmp( p, endTag, length ) == 0 ) {
Set( start, p, strFlags );
return p + length;
} else if (*p == '\n') {
++(*curLineNumPtr);
}
++p;
2016-08-30 12:51:55 +00:00
TIXMLASSERT( p );
}
return 0;
}
char* StrPair::ParseName( char* p )
{
if ( !p || !(*p) ) {
return 0;
}
if ( !XMLUtil::IsNameStartChar( *p ) ) {
return 0;
}
char* const start = p;
++p;
while ( *p && XMLUtil::IsNameChar( *p ) ) {
++p;
}
Set( start, p, 0 );
return p;
}
void StrPair::CollapseWhitespace()
{
// Adjusting _start would cause undefined behavior on delete[]
TIXMLASSERT( ( _flags & NEEDS_DELETE ) == 0 );
// Trim leading space.
2016-12-24 15:34:39 +00:00
_start = XMLUtil::SkipWhiteSpace( _start, 0 );
if ( *_start ) {
2016-09-02 13:53:32 +00:00
const char* p = _start; // the read pointer
char* q = _start; // the write pointer
while( *p ) {
if ( XMLUtil::IsWhiteSpace( *p )) {
2016-12-24 15:34:39 +00:00
p = XMLUtil::SkipWhiteSpace( p, 0 );
if ( *p == 0 ) {
break; // don't write to q; this trims the trailing space.
}
*q = ' ';
++q;
}
*q = *p;
++q;
++p;
}
*q = 0;
}
}
2012-02-21 17:08:12 +00:00
const char* StrPair::GetStr()
{
2015-01-01 14:47:40 +00:00
TIXMLASSERT( _start );
TIXMLASSERT( _end );
if ( _flags & NEEDS_FLUSH ) {
*_end = 0;
_flags ^= NEEDS_FLUSH;
if ( _flags ) {
2016-09-02 13:53:32 +00:00
const char* p = _start; // the read pointer
char* q = _start; // the write pointer
while( p < _end ) {
if ( (_flags & NEEDS_NEWLINE_NORMALIZATION) && *p == CR ) {
// CR-LF pair becomes LF
// CR alone becomes LF
// LF-CR becomes LF
if ( *(p+1) == LF ) {
p += 2;
}
else {
++p;
}
2016-09-06 15:08:55 +00:00
*q = LF;
++q;
}
else if ( (_flags & NEEDS_NEWLINE_NORMALIZATION) && *p == LF ) {
if ( *(p+1) == CR ) {
p += 2;
}
else {
++p;
}
2016-09-06 15:08:55 +00:00
*q = LF;
++q;
}
else if ( (_flags & NEEDS_ENTITY_PROCESSING) && *p == '&' ) {
// Entities handled by tinyXML2:
// - special entities in the entity table [in/out]
// - numeric character reference [in]
// &#20013; or &#x4e2d;
if ( *(p+1) == '#' ) {
const int buflen = 10;
char buf[buflen] = { 0 };
int len = 0;
char* adjusted = const_cast<char*>( XMLUtil::GetCharacterRef( p, buf, &len ) );
if ( adjusted == 0 ) {
*q = *p;
++p;
++q;
}
else {
TIXMLASSERT( 0 <= len && len <= buflen );
TIXMLASSERT( q + len <= adjusted );
p = adjusted;
memcpy( q, buf, len );
q += len;
}
}
else {
2015-05-20 07:29:24 +00:00
bool entityFound = false;
for( int i = 0; i < NUM_ENTITIES; ++i ) {
const Entity& entity = entities[i];
if ( strncmp( p + 1, entity.pattern, entity.length ) == 0
&& *( p + entity.length + 1 ) == ';' ) {
// Found an entity - convert.
*q = entity.value;
++q;
p += entity.length + 2;
2015-05-20 07:29:24 +00:00
entityFound = true;
break;
}
}
2015-05-20 07:29:24 +00:00
if ( !entityFound ) {
// fixme: treat as error?
++p;
++q;
}
}
}
else {
*q = *p;
++p;
++q;
}
}
*q = 0;
}
// The loop below has plenty going on, and this
// is a less useful mode. Break it out.
if ( _flags & NEEDS_WHITESPACE_COLLAPSING ) {
CollapseWhitespace();
}
_flags = (_flags & NEEDS_DELETE);
}
2015-01-01 14:47:40 +00:00
TIXMLASSERT( _start );
return _start;
}
2012-02-21 17:08:12 +00:00
// --------- XMLUtil ----------- //
2016-12-27 06:47:25 +00:00
const char* XMLUtil::writeBoolTrue = "true";
const char* XMLUtil::writeBoolFalse = "false";
2016-12-27 00:45:30 +00:00
2016-12-29 19:19:17 +00:00
void XMLUtil::SetBoolSerialization(const char* writeTrue, const char* writeFalse)
2016-12-27 00:45:30 +00:00
{
2016-12-29 19:19:17 +00:00
static const char* defTrue = "true";
2016-12-27 00:45:30 +00:00
static const char* defFalse = "false";
2016-12-29 19:19:17 +00:00
writeBoolTrue = (writeTrue) ? writeTrue : defTrue;
writeBoolFalse = (writeFalse) ? writeFalse : defFalse;
2016-12-27 00:45:30 +00:00
}
2012-02-21 17:08:12 +00:00
const char* XMLUtil::ReadBOM( const char* p, bool* bom )
{
TIXMLASSERT( p );
TIXMLASSERT( bom );
*bom = false;
const unsigned char* pu = reinterpret_cast<const unsigned char*>(p);
// Check for BOM:
if ( *(pu+0) == TIXML_UTF_LEAD_0
&& *(pu+1) == TIXML_UTF_LEAD_1
&& *(pu+2) == TIXML_UTF_LEAD_2 ) {
*bom = true;
p += 3;
}
TIXMLASSERT( p );
return p;
2012-02-21 17:08:12 +00:00
}
2012-02-24 06:27:28 +00:00
void XMLUtil::ConvertUTF32ToUTF8( unsigned long input, char* output, int* length )
{
const unsigned long BYTE_MASK = 0xBF;
const unsigned long BYTE_MARK = 0x80;
const unsigned long FIRST_BYTE_MARK[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
if (input < 0x80) {
*length = 1;
}
else if ( input < 0x800 ) {
*length = 2;
}
else if ( input < 0x10000 ) {
*length = 3;
}
else if ( input < 0x200000 ) {
*length = 4;
}
else {
2015-03-16 08:08:23 +00:00
*length = 0; // This code won't convert this correctly anyway.
return;
}
output += *length;
2017-08-03 15:42:20 +00:00
// Scary scary fall throughs are annotated with carefully designed comments
// to suppress compiler warnings such as -Wimplicit-fallthrough in gcc
switch (*length) {
case 4:
--output;
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
input >>= 6;
2017-08-03 15:42:20 +00:00
//fall through
case 3:
--output;
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
input >>= 6;
2017-08-03 15:42:20 +00:00
//fall through
case 2:
--output;
*output = (char)((input | BYTE_MARK) & BYTE_MASK);
input >>= 6;
2017-08-03 15:42:20 +00:00
//fall through
case 1:
--output;
*output = (char)(input | FIRST_BYTE_MARK[*length]);
2013-01-14 19:03:14 +00:00
break;
2015-03-14 14:14:00 +00:00
default:
TIXMLASSERT( false );
}
2012-02-24 06:27:28 +00:00
}
const char* XMLUtil::GetCharacterRef( const char* p, char* value, int* length )
{
// Presume an entity, and pull it out.
*length = 0;
if ( *(p+1) == '#' && *(p+2) ) {
unsigned long ucs = 0;
2015-01-12 11:07:10 +00:00
TIXMLASSERT( sizeof( ucs ) >= 4 );
ptrdiff_t delta = 0;
unsigned mult = 1;
2015-01-12 22:05:12 +00:00
static const char SEMICOLON = ';';
if ( *(p+2) == 'x' ) {
// Hexadecimal.
2015-01-15 10:27:47 +00:00
const char* q = p+3;
if ( !(*q) ) {
return 0;
}
2015-01-12 22:05:12 +00:00
q = strchr( q, SEMICOLON );
if ( !q ) {
return 0;
}
2015-01-12 22:05:12 +00:00
TIXMLASSERT( *q == SEMICOLON );
delta = q-p;
--q;
while ( *q != 'x' ) {
2015-03-15 23:11:47 +00:00
unsigned int digit = 0;
if ( *q >= '0' && *q <= '9' ) {
2015-03-14 13:41:46 +00:00
digit = *q - '0';
}
else if ( *q >= 'a' && *q <= 'f' ) {
2015-03-14 13:41:46 +00:00
digit = *q - 'a' + 10;
}
else if ( *q >= 'A' && *q <= 'F' ) {
2015-03-14 13:41:46 +00:00
digit = *q - 'A' + 10;
}
else {
return 0;
}
TIXMLASSERT( digit < 16 );
2015-03-14 13:41:46 +00:00
TIXMLASSERT( digit == 0 || mult <= UINT_MAX / digit );
const unsigned int digitScaled = mult * digit;
TIXMLASSERT( ucs <= ULONG_MAX - digitScaled );
ucs += digitScaled;
2015-01-12 11:07:10 +00:00
TIXMLASSERT( mult <= UINT_MAX / 16 );
mult *= 16;
--q;
}
}
else {
// Decimal.
2015-01-15 10:27:47 +00:00
const char* q = p+2;
if ( !(*q) ) {
return 0;
}
2015-01-12 22:05:12 +00:00
q = strchr( q, SEMICOLON );
if ( !q ) {
return 0;
}
2015-01-12 22:05:12 +00:00
TIXMLASSERT( *q == SEMICOLON );
delta = q-p;
--q;
while ( *q != '#' ) {
if ( *q >= '0' && *q <= '9' ) {
2015-03-14 13:41:46 +00:00
const unsigned int digit = *q - '0';
TIXMLASSERT( digit < 10 );
2015-03-14 13:41:46 +00:00
TIXMLASSERT( digit == 0 || mult <= UINT_MAX / digit );
const unsigned int digitScaled = mult * digit;
TIXMLASSERT( ucs <= ULONG_MAX - digitScaled );
ucs += digitScaled;
}
else {
return 0;
}
2015-01-12 11:07:10 +00:00
TIXMLASSERT( mult <= UINT_MAX / 10 );
mult *= 10;
--q;
}
}
// convert the UCS to UTF-8
ConvertUTF32ToUTF8( ucs, value, length );
return p + delta + 1;
}
return p+1;
2012-02-24 06:27:28 +00:00
}
2012-02-21 17:08:12 +00:00
2012-09-16 18:32:34 +00:00
void XMLUtil::ToStr( int v, char* buffer, int bufferSize )
{
TIXML_SNPRINTF( buffer, bufferSize, "%d", v );
}
void XMLUtil::ToStr( unsigned v, char* buffer, int bufferSize )
{
TIXML_SNPRINTF( buffer, bufferSize, "%u", v );
}
void XMLUtil::ToStr( bool v, char* buffer, int bufferSize )
{
2016-12-27 00:45:30 +00:00
TIXML_SNPRINTF( buffer, bufferSize, "%s", v ? writeBoolTrue : writeBoolFalse);
}
/*
ToStr() of a number is a very tricky topic.
https://github.com/leethomason/tinyxml2/issues/106
*/
void XMLUtil::ToStr( float v, char* buffer, int bufferSize )
{
TIXML_SNPRINTF( buffer, bufferSize, "%.8g", v );
}
void XMLUtil::ToStr( double v, char* buffer, int bufferSize )
{
TIXML_SNPRINTF( buffer, bufferSize, "%.17g", v );
}
2016-06-05 03:18:49 +00:00
void XMLUtil::ToStr(int64_t v, char* buffer, int bufferSize)
{
2016-07-18 05:49:40 +00:00
// horrible syntax trick to make the compiler happy about %lld
TIXML_SNPRINTF(buffer, bufferSize, "%lld", (long long)v);
2016-06-05 03:18:49 +00:00
}
bool XMLUtil::ToInt( const char* str, int* value )
{
if ( TIXML_SSCANF( str, "%d", value ) == 1 ) {
return true;
}
return false;
}
bool XMLUtil::ToUnsigned( const char* str, unsigned *value )
{
if ( TIXML_SSCANF( str, "%u", value ) == 1 ) {
return true;
}
return false;
}
bool XMLUtil::ToBool( const char* str, bool* value )
{
int ival = 0;
if ( ToInt( str, &ival )) {
*value = (ival==0) ? false : true;
return true;
}
if ( StringEqual( str, "true" ) ) {
*value = true;
return true;
}
else if ( StringEqual( str, "false" ) ) {
*value = false;
return true;
}
return false;
}
bool XMLUtil::ToFloat( const char* str, float* value )
{
if ( TIXML_SSCANF( str, "%f", value ) == 1 ) {
return true;
}
return false;
}
2016-06-05 03:18:49 +00:00
bool XMLUtil::ToDouble( const char* str, double* value )
{
if ( TIXML_SSCANF( str, "%lf", value ) == 1 ) {
return true;
}
return false;
}
2016-06-05 03:18:49 +00:00
bool XMLUtil::ToInt64(const char* str, int64_t* value)
{
2016-07-18 05:49:40 +00:00
long long v = 0; // horrible syntax trick to make the compiler happy about %lld
if (TIXML_SSCANF(str, "%lld", &v) == 1) {
*value = (int64_t)v;
2016-06-05 03:18:49 +00:00
return true;
}
return false;
}
2012-09-16 18:32:34 +00:00
char* XMLDocument::Identify( char* p, XMLNode** node )
2012-01-19 01:43:40 +00:00
{
TIXMLASSERT( node );
TIXMLASSERT( p );
char* const start = p;
int const startLine = _parseCurLineNum;
p = XMLUtil::SkipWhiteSpace( p, &_parseCurLineNum );
if( !*p ) {
*node = 0;
TIXMLASSERT( p );
return p;
}
// These strings define the matching patterns:
static const char* xmlHeader = { "<?" };
static const char* commentHeader = { "<!--" };
static const char* cdataHeader = { "<![CDATA[" };
static const char* dtdHeader = { "<!" };
static const char* elementHeader = { "<" }; // and a header for everything else; check last.
static const int xmlHeaderLen = 2;
static const int commentHeaderLen = 4;
static const int cdataHeaderLen = 9;
static const int dtdHeaderLen = 2;
static const int elementHeaderLen = 1;
2012-01-19 01:43:40 +00:00
TIXMLASSERT( sizeof( XMLComment ) == sizeof( XMLUnknown ) ); // use same memory pool
TIXMLASSERT( sizeof( XMLComment ) == sizeof( XMLDeclaration ) ); // use same memory pool
XMLNode* returnNode = 0;
if ( XMLUtil::StringEqual( p, xmlHeader, xmlHeaderLen ) ) {
2017-02-27 12:53:40 +00:00
returnNode = CreateUnlinkedNode<XMLDeclaration>( _commentPool );
returnNode->_parseLineNum = _parseCurLineNum;
p += xmlHeaderLen;
}
else if ( XMLUtil::StringEqual( p, commentHeader, commentHeaderLen ) ) {
2017-02-27 12:53:40 +00:00
returnNode = CreateUnlinkedNode<XMLComment>( _commentPool );
returnNode->_parseLineNum = _parseCurLineNum;
p += commentHeaderLen;
}
else if ( XMLUtil::StringEqual( p, cdataHeader, cdataHeaderLen ) ) {
2017-02-27 12:53:40 +00:00
XMLText* text = CreateUnlinkedNode<XMLText>( _textPool );
returnNode = text;
returnNode->_parseLineNum = _parseCurLineNum;
p += cdataHeaderLen;
text->SetCData( true );
}
else if ( XMLUtil::StringEqual( p, dtdHeader, dtdHeaderLen ) ) {
2017-02-27 12:53:40 +00:00
returnNode = CreateUnlinkedNode<XMLUnknown>( _commentPool );
returnNode->_parseLineNum = _parseCurLineNum;
p += dtdHeaderLen;
}
else if ( XMLUtil::StringEqual( p, elementHeader, elementHeaderLen ) ) {
2017-02-27 12:53:40 +00:00
returnNode = CreateUnlinkedNode<XMLElement>( _elementPool );
returnNode->_parseLineNum = _parseCurLineNum;
p += elementHeaderLen;
}
else {