logo

libbulletml

Library of Bullet Markup Language (forked from https://shinh.skr.jp/libbulletml/index_en.html )git clone https://anongit.hacktivis.me/git/libbulletml.git/

tinyxmlparser.cpp (12945B)


  1. /*
  2. Copyright (c) 2000 Lee Thomason (www.grinninglizard.com)
  3. This software is provided 'as-is', without any express or implied
  4. warranty. In no event will the authors be held liable for any
  5. damages arising from the use of this software.
  6. Permission is granted to anyone to use this software for any
  7. purpose, including commercial applications, and to alter it and
  8. redistribute it freely, subject to the following restrictions:
  9. 1. The origin of this software must not be misrepresented; you must
  10. not claim that you wrote the original software. If you use this
  11. software in a product, an acknowledgment in the product documentation
  12. would be appreciated but is not required.
  13. 2. Altered source versions must be plainly marked as such, and
  14. must not be misrepresented as being the original software.
  15. 3. This notice may not be removed or altered from any source
  16. distribution.
  17. */
  18. #include "tinyxml.h"
  19. #include <ctype.h>
  20. #include <cstring>
  21. const char* TiXmlBase::SkipWhiteSpace( const char* p )
  22. {
  23. while ( p && *p &&
  24. ( isspace( *p ) || *p == '\n' || *p == '\r' ) )
  25. p++;
  26. return p;
  27. }
  28. const char* TiXmlBase::ReadName( const char* p, std::string* name )
  29. {
  30. *name = "";
  31. const char* start = p;
  32. // Names start with letters or underscores.
  33. // After that, they can be letters, underscores, numbers,
  34. // hyphens, or colons. (Colons are valid ony for namespaces,
  35. // but tinyxml can't tell namespaces from names.)
  36. if ( p && ( isalpha( *p ) || *p == '_' ) )
  37. {
  38. p++;
  39. while( p && *p &&
  40. ( isalnum( *p )
  41. || *p == '_'
  42. || *p == '-'
  43. || *p == ':' ) )
  44. {
  45. p++;
  46. }
  47. name->append( start, p - start );
  48. return p;
  49. }
  50. return 0;
  51. }
  52. const char* TiXmlBase::ReadText( const char* p,
  53. std::string* text,
  54. bool trimWhiteSpace,
  55. const char* endTag,
  56. bool caseInsensitive )
  57. {
  58. *text = "";
  59. if ( !trimWhiteSpace // certain tags always keep whitespace
  60. /*|| !condenseWhiteSpace*/ ) // if true, whitespace is always kept
  61. {
  62. // Keep all the white space.
  63. while ( p && *p
  64. && strncmp( p, endTag, strlen(endTag) ) != 0
  65. )
  66. {
  67. char c = *(p++);
  68. (* text) += c;
  69. }
  70. }
  71. else
  72. {
  73. bool whitespace = false;
  74. // Remove leading white space:
  75. p = SkipWhiteSpace( p );
  76. while ( p && *p
  77. && strncmp( p, endTag, strlen(endTag) ) != 0 )
  78. {
  79. if ( *p == '\r' || *p == '\n' )
  80. {
  81. whitespace = true;
  82. ++p;
  83. }
  84. else if ( isspace( *p ) )
  85. {
  86. whitespace = true;
  87. ++p;
  88. }
  89. else
  90. {
  91. // If we've found whitespace, add it before the
  92. // new character. Any whitespace just becomes a space.
  93. if ( whitespace )
  94. {
  95. (* text) += ' ';
  96. whitespace = false;
  97. }
  98. char c = *(p++);
  99. (* text) += c;
  100. }
  101. }
  102. }
  103. return p + strlen( endTag );
  104. }
  105. const char* TiXmlDocument::Parse( const char* start )
  106. {
  107. // Parse away, at the document level. Since a document
  108. // contains nothing but other tags, most of what happens
  109. // here is skipping white space.
  110. const char* p = start;
  111. p = SkipWhiteSpace( p );
  112. if ( !p || !*p )
  113. {
  114. error = true;
  115. errorDesc = "Document empty.";
  116. }
  117. while ( p && *p )
  118. {
  119. if ( *p != '<' )
  120. {
  121. error = true;
  122. errorDesc = "The '<' symbol that starts a tag was not found.";
  123. break;
  124. }
  125. else
  126. {
  127. TiXmlNode* node = IdentifyAndParse( &p );
  128. if ( node )
  129. {
  130. LinkEndChild( node );
  131. }
  132. }
  133. p = SkipWhiteSpace( p );
  134. }
  135. return 0; // Return null is fine for a document: once it is read, the parsing is over.
  136. }
  137. TiXmlNode* TiXmlNode::IdentifyAndParse( const char** where )
  138. {
  139. const char* p = *where;
  140. TiXmlNode* returnNode = 0;
  141. assert( *p == '<' );
  142. TiXmlDocument* doc = GetDocument();
  143. p = SkipWhiteSpace( p+1 );
  144. // What is this thing?
  145. // - Elements start with a letter or underscore, but xml is reserved.
  146. // - Comments: <!--
  147. // - Everthing else is unknown to tinyxml.
  148. //
  149. if ( tolower( *(p+0) ) == '?'
  150. && tolower( *(p+1) ) == 'x'
  151. && tolower( *(p+2) ) == 'm'
  152. && tolower( *(p+3) ) == 'l' )
  153. {
  154. #ifdef DEBUG_PARSER
  155. printf( "XML parsing Declaration\n" );
  156. #endif
  157. returnNode = new TiXmlDeclaration();
  158. }
  159. else if ( isalpha( *p ) || *p == '_' )
  160. {
  161. #ifdef DEBUG_PARSER
  162. printf( "XML parsing Element\n" );
  163. #endif
  164. returnNode = new TiXmlElement( "" );
  165. }
  166. else if ( *(p+0) == '!'
  167. && *(p+1) == '-'
  168. && *(p+2) == '-' )
  169. {
  170. #ifdef DEBUG_PARSER
  171. printf( "XML parsing Comment\n" );
  172. #endif
  173. returnNode = new TiXmlComment();
  174. }
  175. else if ( strncmp(p, "![CDATA[", 8) == 0 )
  176. {
  177. TiXmlNode* cdataNode = new TiXmlCData();
  178. if ( !cdataNode )
  179. {
  180. if ( doc ) doc->SetError( TIXML_ERROR_OUT_OF_MEMORY );
  181. return 0;
  182. }
  183. returnNode = cdataNode;
  184. }
  185. else
  186. {
  187. #ifdef DEBUG_PARSER
  188. printf( "XML parsing Comment\n" );
  189. #endif
  190. returnNode = new TiXmlUnknown();
  191. }
  192. if ( returnNode )
  193. {
  194. // Set the parent, so it can report errors
  195. returnNode->parent = this;
  196. p = returnNode->Parse( p );
  197. }
  198. else
  199. {
  200. if ( doc )
  201. doc->SetError( TIXML_ERROR_OUT_OF_MEMORY );
  202. p = 0;
  203. }
  204. *where = p;
  205. return returnNode;
  206. }
  207. const char* TiXmlElement::Parse( const char* p )
  208. {
  209. TiXmlDocument* document = GetDocument();
  210. p = SkipWhiteSpace( p );
  211. if ( !p || !*p )
  212. {
  213. if ( document ) document->SetError( TIXML_ERROR_PARSING_ELEMENT );
  214. return 0;
  215. }
  216. // Read the name.
  217. p = ReadName( p, &value );
  218. if ( !p )
  219. {
  220. if ( document ) document->SetError( TIXML_ERROR_FAILED_TO_READ_ELEMENT_NAME );
  221. return 0;
  222. }
  223. std::string endTag = "</";
  224. endTag += value;
  225. endTag += ">";
  226. // Check for and read attributes. Also look for an empty
  227. // tag or an end tag.
  228. while ( p && *p )
  229. {
  230. p = SkipWhiteSpace( p );
  231. if ( !p || !*p )
  232. {
  233. if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES );
  234. return 0;
  235. }
  236. if ( *p == '/' )
  237. {
  238. // Empty tag.
  239. if ( *(p+1) != '>' )
  240. {
  241. if ( document ) document->SetError( TIXML_ERROR_PARSING_EMPTY );
  242. return 0;
  243. }
  244. return p+2;
  245. }
  246. else if ( *p == '>' )
  247. {
  248. // Done with attributes (if there were any.)
  249. // Read the value -- which can include other
  250. // elements -- read the end tag, and return.
  251. p = ReadValue( p+1 ); // Note this is an Element method, and will set the error if one happens.
  252. if ( !p )
  253. return 0;
  254. // We should find the end tag now
  255. std::string buf( p, endTag.size() );
  256. if ( endTag == buf )
  257. {
  258. return p+endTag.size();
  259. }
  260. else
  261. {
  262. if ( document ) document->SetError( TIXML_ERROR_READING_END_TAG );
  263. return 0;
  264. }
  265. }
  266. else
  267. {
  268. // Try to read an element:
  269. TiXmlAttribute attrib;
  270. attrib.SetDocument( document );
  271. p = attrib.Parse( p );
  272. if ( p )
  273. {
  274. SetAttribute( attrib.Name(), attrib.Value() );
  275. }
  276. }
  277. }
  278. return 0;
  279. }
  280. const char* TiXmlElement::ReadValue( const char* p )
  281. {
  282. TiXmlDocument* document = GetDocument();
  283. // Read in text and elements in any order.
  284. p = SkipWhiteSpace( p );
  285. while ( p && *p )
  286. {
  287. const char* start = p;
  288. while ( *p && *p != '<' )
  289. p++;
  290. if ( !*p )
  291. {
  292. if ( document ) document->SetError( TIXML_ERROR_READING_ELEMENT_VALUE );
  293. return 0;
  294. }
  295. if ( p != start )
  296. {
  297. // Take what we have, make a text element.
  298. TiXmlText* text = new TiXmlText();
  299. if ( !text )
  300. {
  301. if ( document ) document->SetError( TIXML_ERROR_OUT_OF_MEMORY );
  302. return 0;
  303. }
  304. text->Parse( start );
  305. if ( !text->Blank() )
  306. LinkEndChild( text );
  307. else
  308. delete text;
  309. }
  310. else
  311. {
  312. // We hit a '<'
  313. // Have we hit a new element or an end tag?
  314. if ( *(p+1) == '/' )
  315. {
  316. return p; // end tag
  317. }
  318. else
  319. {
  320. // TiXmlElement* element = new TiXmlElement( "" );
  321. //
  322. // if ( element )
  323. // {
  324. // p = element->Parse( p+1 );
  325. // if ( p )
  326. // LinkEndChild( element );
  327. // }
  328. // else
  329. // {
  330. // if ( document ) document->SetError( ERROR_OUT_OF_MEMORY );
  331. // return 0;
  332. // }
  333. TiXmlNode* node = IdentifyAndParse( &p );
  334. if ( node )
  335. {
  336. LinkEndChild( node );
  337. }
  338. else
  339. {
  340. return 0;
  341. }
  342. }
  343. }
  344. }
  345. return 0;
  346. }
  347. const char* TiXmlUnknown::Parse( const char* p )
  348. {
  349. const char* end = strchr( p, '>' );
  350. if ( !end )
  351. {
  352. TiXmlDocument* document = GetDocument();
  353. if ( document )
  354. document->SetError( TIXML_ERROR_PARSING_UNKNOWN );
  355. return 0;
  356. }
  357. else
  358. {
  359. value = std::string( p, end-p );
  360. // value.resize( end - p );
  361. return end + 1; // return just past the '>'
  362. }
  363. }
  364. const char* TiXmlComment::Parse( const char* p )
  365. {
  366. assert( *p == '!' && *(p+1) == '-' && *(p+2) == '-' );
  367. // Find the end, copy the parts between to the value of
  368. // this object, and return.
  369. const char* start = p+3;
  370. const char* end = strstr( p, "-->" );
  371. if ( !end )
  372. {
  373. TiXmlDocument* document = GetDocument();
  374. if ( document )
  375. document->SetError( TIXML_ERROR_PARSING_COMMENT );
  376. return 0;
  377. }
  378. else
  379. {
  380. // Assemble the comment, removing the white space.
  381. bool whiteSpace = false;
  382. const char* q;
  383. for( q=start; q<end; q++ )
  384. {
  385. if ( isspace( *q ) )
  386. {
  387. if ( !whiteSpace )
  388. {
  389. value += ' ';
  390. whiteSpace = true;
  391. }
  392. }
  393. else
  394. {
  395. value += *q;
  396. whiteSpace = false;
  397. }
  398. }
  399. // value = std::string( start, end-start );
  400. return end + 3; // return just past the '>'
  401. }
  402. }
  403. const char* TiXmlAttribute::Parse( const char* p )
  404. {
  405. // Read the name, the '=' and the value.
  406. p = ReadName( p, &name );
  407. if ( !p )
  408. {
  409. if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES );
  410. return 0;
  411. }
  412. p = SkipWhiteSpace( p );
  413. if ( !p || *p != '=' )
  414. {
  415. if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES );
  416. return 0;
  417. }
  418. p = SkipWhiteSpace( p+1 );
  419. if ( !p || !*p )
  420. {
  421. if ( document ) document->SetError( TIXML_ERROR_READING_ATTRIBUTES );
  422. return 0;
  423. }
  424. const char* end = 0;
  425. const char* start = p+1;
  426. const char* past = 0;
  427. if ( *p == '\'' )
  428. {
  429. end = strchr( start, '\'' );
  430. past = end+1;
  431. }
  432. else if ( *p == '"' )
  433. {
  434. end = strchr( start, '"' );
  435. past = end+1;
  436. }
  437. else
  438. {
  439. // All attribute values should be in single or double quotes.
  440. // But this is such a common error that the parser will try
  441. // its best, even without them.
  442. start--;
  443. for ( end = start; *end; end++ )
  444. {
  445. if ( isspace( *end ) || *end == '/' || *end == '>' )
  446. break;
  447. }
  448. past = end;
  449. }
  450. value = std::string( start, end-start );
  451. return past;
  452. }
  453. const char* TiXmlText::Parse( const char* p )
  454. {
  455. value = "";
  456. bool ignoreWhite = true;
  457. const char* end = "<";
  458. p = ReadText( p, &value, ignoreWhite, end, false );
  459. if ( p )
  460. return p-1; // don't truncate the '<'
  461. return 0;
  462. #if 0
  463. // Remove leading white space:
  464. p = SkipWhiteSpace( p );
  465. while ( *p && *p != '<' )
  466. {
  467. if ( *p == '\r' || *p == '\n' )
  468. {
  469. whitespace = true;
  470. }
  471. else if ( isspace( *p ) )
  472. {
  473. whitespace = true;
  474. }
  475. else
  476. {
  477. // If we've found whitespace, add it before the
  478. // new character. Any whitespace just becomes a space.
  479. if ( whitespace )
  480. {
  481. value += ' ';
  482. whitespace = false;
  483. }
  484. value += *p;
  485. }
  486. p++;
  487. }
  488. // Keep white space before the '<'
  489. if ( whitespace )
  490. value += ' ';
  491. return p;
  492. #endif
  493. }
  494. const char* TiXmlCData::Parse( const char* p )
  495. {
  496. value = "";
  497. bool ignoreWhite = false;
  498. p += 8;
  499. const char* end = "]]>";
  500. p = ReadText( p, &value, ignoreWhite, end, false );
  501. if ( p )
  502. return p;
  503. return 0;
  504. }
  505. const char* TiXmlDeclaration::Parse( const char* p )
  506. {
  507. // Find the beginning, find the end, and look for
  508. // the stuff in-between.
  509. const char* start = p+4;
  510. const char* end = strstr( start, "?>" );
  511. // Be nice to the user:
  512. if ( !end )
  513. {
  514. end = strstr( start, ">" );
  515. end++;
  516. }
  517. else
  518. {
  519. end += 2;
  520. }
  521. if ( !end )
  522. {
  523. TiXmlDocument* document = GetDocument();
  524. if ( document )
  525. document->SetError( TIXML_ERROR_PARSING_DECLARATION );
  526. return 0;
  527. }
  528. else
  529. {
  530. const char* p;
  531. p = strstr( start, "version" );
  532. if ( p && p < end )
  533. {
  534. TiXmlAttribute attrib;
  535. attrib.Parse( p );
  536. version = attrib.Value();
  537. }
  538. p = strstr( start, "encoding" );
  539. if ( p && p < end )
  540. {
  541. TiXmlAttribute attrib;
  542. attrib.Parse( p );
  543. encoding = attrib.Value();
  544. }
  545. p = strstr( start, "standalone" );
  546. if ( p && p < end )
  547. {
  548. TiXmlAttribute attrib;
  549. attrib.Parse( p );
  550. standalone = attrib.Value();
  551. }
  552. }
  553. return end;
  554. }
  555. bool TiXmlText::Blank()
  556. {
  557. for ( unsigned i=0; i<value.size(); i++ )
  558. if ( !isspace( value[i] ) )
  559. return false;
  560. return true;
  561. }