PixelLightAPI
.
|
00001 /*********************************************************\ 00002 * File: XmlBase.h * 00003 * 00004 * Copyright (C) 2002-2012 The PixelLight Team (http://www.pixellight.org/) 00005 * 00006 * This file is part of PixelLight. 00007 * 00008 * PixelLight is free software: you can redistribute it and/or modify 00009 * it under the terms of the GNU Lesser General Public License as published by 00010 * the Free Software Foundation, either version 3 of the License, or 00011 * (at your option) any later version. 00012 * 00013 * PixelLight is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 * GNU Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public License 00019 * along with PixelLight. If not, see <http://www.gnu.org/licenses/>. 00020 \*********************************************************/ 00021 00022 00023 #ifndef __PLCORE_XML_BASE_H__ 00024 #define __PLCORE_XML_BASE_H__ 00025 #pragma once 00026 00027 00028 //[-------------------------------------------------------] 00029 //[ Includes ] 00030 //[-------------------------------------------------------] 00031 #include "PLCore/String/String.h" 00032 00033 00034 //[-------------------------------------------------------] 00035 //[ Namespace ] 00036 //[-------------------------------------------------------] 00037 namespace PLCore { 00038 00039 00040 //[-------------------------------------------------------] 00041 //[ Forward declarations ] 00042 //[-------------------------------------------------------] 00043 class File; 00044 class XmlParsingData; 00045 00046 00047 //[-------------------------------------------------------] 00048 //[ Classes ] 00049 //[-------------------------------------------------------] 00050 /** 00051 * @brief 00052 * Abstract XML base class 00053 * 00054 * @remarks 00055 * 'XmlBase' is a base class for every class of the XML parser. 00056 * It does little except to establish that parser classes can be printed and provide 00057 * some utility functions. In XML, the document and elements can contain other 00058 * elements and other types of nodes. 00059 * 00060 * @verbatim 00061 * A document can contain: Element (container or leaf) 00062 * Comment (leaf) 00063 * Unknown (leaf) 00064 * Declaration (leaf) 00065 * 00066 * An element can contain: Element (container or leaf) 00067 * Text (leaf) 00068 * Attributes (not on tree) 00069 * Comment (leaf) 00070 * Unknown (leaf) 00071 * 00072 * A declaration contains: Attributes (not on tree) 00073 * @endverbatim 00074 */ 00075 class XmlBase { 00076 00077 00078 //[-------------------------------------------------------] 00079 //[ Friends ] 00080 //[-------------------------------------------------------] 00081 friend class XmlParsingData; 00082 00083 00084 //[-------------------------------------------------------] 00085 //[ Public definitions ] 00086 //[-------------------------------------------------------] 00087 public: 00088 /** 00089 * @brief 00090 * Error codes 00091 */ 00092 enum { 00093 NoError, /**< No error */ 00094 Error, /**< Error */ 00095 ErrorOpeningFile, /**< Error opening file */ 00096 ErrorParsingElement, /**< Error parsing element */ 00097 ErrorFailedToReadElementName, /**< Error failed to read element name */ 00098 ErrorReadingElementValue, /**< Error reading element value */ 00099 ErrorReadingAttributes, /**< Error reading attributes */ 00100 ErrorParsingEmpty, /**< Error parsing empty */ 00101 ErrorReadingEndTag, /**< Error reading end tag */ 00102 ErrorParsingUnknown, /**< Error parsing unknown */ 00103 ErrorParsingComment, /**< Error parsing comment */ 00104 ErrorParsingDeclaration, /**< Error parsing declaration */ 00105 ErrorDocumentEmpty, /**< Error document empty */ 00106 ErrorEmbeddedNull, /**< Error embedded null */ 00107 ErrorParsingCData, /**< Error parsing CDATA */ 00108 ErrorDocumentTopOnly, /**< Error document top only */ 00109 ErrorStringCount /**< Error string count */ 00110 }; 00111 00112 /** 00113 * @brief 00114 * Only used by 'Attribute::Query'-functions 00115 */ 00116 enum EQueryResult { 00117 Success, /**< All went fine */ 00118 NoAttribute, /**< Attribute does not exist */ 00119 WrongType /**< Invalid attribute type */ 00120 }; 00121 00122 /** 00123 * @brief 00124 * Used by the parsing routines 00125 */ 00126 enum EEncoding { 00127 EncodingUnknown, /**< Unknown encoding (default) */ 00128 EncodingUTF8, /**< UTF8 encoding */ 00129 EncodingLegacy /**< Legacy encoding */ 00130 }; 00131 00132 00133 //[-------------------------------------------------------] 00134 //[ Public static functions ] 00135 //[-------------------------------------------------------] 00136 public: 00137 /** 00138 * @brief 00139 * Return the current white space setting 00140 * 00141 * @return 00142 * 'true' if white space condensed is set, else 'false' 00143 * 00144 * @remarks 00145 * The world does not agree on whether white space should be kept or 00146 * not. In order to make everyone happy, these global, static functions 00147 * are provided to set whether or not the parser will condense all white space 00148 * into a single space or not. The default is to condense. Note changing this 00149 * values is not thread safe. 00150 */ 00151 static inline bool IsWhiteSpaceCondensed(); 00152 00153 /** 00154 * @brief 00155 * Set white space handling 00156 * 00157 * @param[in] bCondense 00158 * 'true' if white space condensed is set, else 'false' 00159 * 00160 * @see 00161 * - IsWhiteSpaceCondensed() 00162 */ 00163 static inline void SetCondenseWhiteSpace(bool bCondense); 00164 00165 00166 //[-------------------------------------------------------] 00167 //[ Public functions ] 00168 //[-------------------------------------------------------] 00169 public: 00170 /** 00171 * @brief 00172 * Destructor 00173 */ 00174 PLCORE_API virtual ~XmlBase(); 00175 00176 /** 00177 * @brief 00178 * Return the row position, in the original source file, of this node or attribute 00179 * 00180 * @return 00181 * Row position 00182 * 00183 * @remarks 00184 * The row and column are 1-based. (That is the first row and first column is 00185 * 1, 1). If the returns values are 0 or less, then the parser does not have 00186 * a row and column value. 00187 * Generally, the row and column value will be set when the 'XmlDocument::Load()', 00188 * 'XmlDocument::Load()', or any 'XmlNode::Parse()' is called. It will NOT be set 00189 * when the DOM was created from operator '>>'. 00190 * The values reflect the initial load. Once the DOM is modified programmatically 00191 * (by adding or changing nodes and attributes) the new values will NOT update to 00192 * reflect changes in the document. 00193 * There is a minor performance cost to computing the row and column. Computation 00194 * can be disabled if 'XmlDocument::SetTabSize()' is called with 0 as the value. 00195 * 00196 * @see 00197 * - XmlDocument::SetTabSize() 00198 */ 00199 inline int GetRow() const; 00200 00201 /** 00202 * @brief 00203 * Return the column position, in the original source file, of this node or attribute 00204 * 00205 * @return 00206 * Column position 00207 * 00208 * @see 00209 * - GetRow() 00210 */ 00211 inline int GetColumn() const; 00212 00213 /** 00214 * @brief 00215 * Returns the user data 00216 * 00217 * @return 00218 * User data, can be a null pointer 00219 * 00220 * @note 00221 * - The user data is not used internally, it's really user only :) 00222 */ 00223 inline void *GetUserData() const; 00224 00225 /** 00226 * @brief 00227 * Sets the user data 00228 * 00229 * @param[in] pUser 00230 * User data, can be a null pointer 00231 * 00232 * @see 00233 * - GetUserData() 00234 */ 00235 inline void SetUserData(void *pUser); 00236 00237 00238 //[-------------------------------------------------------] 00239 //[ Public virtual XmlBase functions ] 00240 //[-------------------------------------------------------] 00241 public: 00242 /** 00243 * @brief 00244 * Save function 00245 * 00246 * @param[out] cFile 00247 * File to write in, must be opened and writable 00248 * @param[in] nDepth 00249 * Current depth 00250 * 00251 * @return 00252 * 'true' if all went fine, else 'false' 00253 */ 00254 virtual bool Save(File &cFile, uint32 nDepth = 0) = 0; 00255 00256 /** 00257 * @brief 00258 * Output as string function 00259 * 00260 * @param[in] nDepth 00261 * Current depth 00262 * 00263 * @return 00264 * String containing the XML 00265 */ 00266 virtual String ToString(uint32 nDepth = 0) const = 0; 00267 00268 /** 00269 * @brief 00270 * Parse the given null terminated block of XML data 00271 * 00272 * @param[in] pszData 00273 * Parsing data, if a null pointer, an error will be returned 00274 * @param[in] pData 00275 * Parsing data, can be a null pointer 00276 * @param[in] nEncoding 00277 * Encoding 00278 * 00279 * @return 00280 * The pointer to the parameter 'pszData' if all went fine, else a null pointer 00281 * 00282 * @remarks 00283 * Passing in an encoding to this method (either 'EncodingLegacy' or 00284 * 'EncodingUTF8' will force the parser to use that encoding, regardless 00285 * of what the parser might otherwise try to detect. 00286 */ 00287 virtual const char *Parse(const char *pszData, XmlParsingData *pData = nullptr, EEncoding nEncoding = EncodingUnknown) = 0; 00288 00289 00290 //[-------------------------------------------------------] 00291 //[ Protected definitions ] 00292 //[-------------------------------------------------------] 00293 protected: 00294 /** 00295 * @brief 00296 * Internal structure for tracking location of items in the XML file 00297 */ 00298 struct Cursor { 00299 Cursor() { nRow = nColumn = -1; } 00300 void Clear() { nRow = nColumn = -1; } 00301 00302 int nRow; // 0 based 00303 int nColumn; // 0 based 00304 }; 00305 00306 // Bunch of unicode info at: 00307 // http://www.unicode.org/faq/utf_bom.html 00308 // Including the basic of this table, which determines the #bytes in the 00309 // sequence from the lead byte. 1 placed for invalid sequences -- 00310 // although the result will be junk, pass it through as much as possible. 00311 // Beware of the non-characters in UTF-8: 00312 // ef bb bf (Microsoft "lead bytes") 00313 // ef bf be 00314 // ef bf bf 00315 static const unsigned char UTF_LEAD_0 = 0xefU; 00316 static const unsigned char UTF_LEAD_1 = 0xbbU; 00317 static const unsigned char UTF_LEAD_2 = 0xbfU; 00318 00319 00320 //[-------------------------------------------------------] 00321 //[ Protected static functions ] 00322 //[-------------------------------------------------------] 00323 protected: 00324 static const char *SkipWhiteSpace(const char *pszData, EEncoding nEncoding); 00325 static bool IsWhiteSpace(char c); 00326 static bool IsWhiteSpace(int c); 00327 00328 /** 00329 * @brief 00330 * Reads an XML name into the string provided 00331 * 00332 * @param[in] pszData 00333 * Data 00334 * @param[out] sName 00335 * Read name 00336 * @param[in] nEncoding 00337 * Encoding 00338 * 00339 * @return 00340 * Returns a pointer just past the last character of the name, or 0 if the function has an error 00341 */ 00342 static const char *ReadName(const char *pszData, String &sName, EEncoding nEncoding); 00343 00344 /** 00345 * @brief 00346 * Reads text 00347 * 00348 * @param[in] pszData 00349 * Where to start 00350 * @param[out] sText 00351 * The string read 00352 * @param[in] bTrimWhiteSpace 00353 * Whether to keep the white space 00354 * @param[in] pszEndTag 00355 * What ends this text 00356 * @param[in] bCaseInsensitive 00357 * Whether to ignore case in the end tag 00358 * @param[in] nEncoding 00359 * The current encoding 00360 * 00361 * @return 00362 * Returns a pointer past the given end tag 00363 * 00364 * @note 00365 * - Wickedly complex options, but it keeps the (sensitive) code in one place. 00366 */ 00367 static const char *ReadText(const char *pszData, String &sText, bool bTrimWhiteSpace, const char *pszEndTag, bool bCaseInsensitive, EEncoding nEncoding); 00368 00369 // If an entity has been found, transform it into a character. 00370 static const char *GetEntity(const char *pszData, char *pszValue, int &nLength, EEncoding nEncoding); 00371 00372 // Get a character, while interpreting entities. 00373 // The length can be from 0 to 4 bytes. 00374 static const char *GetChar(const char *pszData, char *pszValue, int &nLength, EEncoding nEncoding); 00375 00376 // Return true if the next characters in the stream are any of the endTag sequences. 00377 // Ignore case only works for english, and should only be relied on when comparing 00378 // to English words: StringEqual(pszData, "version", true) is fine. 00379 static bool StringEqual(const char *pszData, const char *pszTag, bool bIgnoreCase, EEncoding nEncoding); 00380 00381 // None of these methods are reliable for any language except English. 00382 // Good for approximation, not great for accuracy. 00383 static int IsAlpha(unsigned char nByte); 00384 static int IsAlphaNum(unsigned char nByte); 00385 static int ToLower(int nValue, EEncoding nEncoding); 00386 static void ConvertUTF32ToUTF8(unsigned long nInput, char *pszOutput, int &nLength); 00387 00388 // Expands entities in a string. Note this should not contain the tag's '<', '>', etc, 00389 // or they will be transformed into entities! 00390 static void EncodeString(const String &sInString, String &sOutString); 00391 00392 00393 //[-------------------------------------------------------] 00394 //[ Protected functions ] 00395 //[-------------------------------------------------------] 00396 protected: 00397 /** 00398 * @brief 00399 * Default constructor 00400 */ 00401 XmlBase(); 00402 00403 00404 //[-------------------------------------------------------] 00405 //[ Protected data ] 00406 //[-------------------------------------------------------] 00407 protected: 00408 void *m_pUserData; /**< User data, can be a null pointer */ 00409 Cursor m_cCursor; /**< Cursor */ 00410 00411 00412 //[-------------------------------------------------------] 00413 //[ Private functions ] 00414 //[-------------------------------------------------------] 00415 private: 00416 /** 00417 * @brief 00418 * Copy constructor 00419 * 00420 * @param[in] cSource 00421 * Source to copy from 00422 */ 00423 XmlBase(const XmlBase &cSource); 00424 00425 /** 00426 * @brief 00427 * Copy operator 00428 * 00429 * @param[in] cSource 00430 * Source to copy from 00431 * 00432 * @return 00433 * Reference to this instance 00434 */ 00435 XmlBase &operator =(const XmlBase &cSource); 00436 00437 00438 //[-------------------------------------------------------] 00439 //[ Private static data ] 00440 //[-------------------------------------------------------] 00441 private: 00442 static const int utf8ByteTable[256]; /**< Table that returns, for a given lead byte, the total number of bytes in the UTF-8 sequence */ 00443 struct Entity { 00444 String sString; 00445 char nCharacter; 00446 }; 00447 enum { 00448 NumOfEntities = 5 00449 }; 00450 static Entity sEntity[NumOfEntities]; 00451 static PLCORE_API bool bCondenseWhiteSpace; 00452 00453 00454 }; 00455 00456 00457 //[-------------------------------------------------------] 00458 //[ Namespace ] 00459 //[-------------------------------------------------------] 00460 } // PLCore 00461 00462 00463 //[-------------------------------------------------------] 00464 //[ Implementation ] 00465 //[-------------------------------------------------------] 00466 #include "PLCore/Xml/XmlBase.inl" 00467 00468 00469 #endif // __PLCORE_XML_BASE_H__
|