PixelLightAPI: XmlBase.h Source File

Go to the documentation of this file.
00001 /*********************************************************\
00002  *  File: XmlBase.h                                      *
00003  *
00004  *  Copyright (C) 2002-2012 The PixelLight Team (http://www.pixellight.org/)
00005  *
00006  *  This file is part of PixelLight.
00007  *
00008  *  PixelLight is free software: you can redistribute it and/or modify
00009  *  it under the terms of the GNU Lesser General Public License as published by
00010  *  the Free Software Foundation, either version 3 of the License, or
00011  *  (at your option) any later version.
00012  *
00013  *  PixelLight is distributed in the hope that it will be useful,
00014  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00016  *  GNU Lesser General Public License for more details.
00017  *
00018  *  You should have received a copy of the GNU Lesser General Public License
00019  *  along with PixelLight. If not, see <http://www.gnu.org/licenses/>.
00020 \*********************************************************/
00021 
00022 
00023 #ifndef __PLCORE_XML_BASE_H__
00024 #define __PLCORE_XML_BASE_H__
00025 #pragma once
00026 
00027 
00028 //[-------------------------------------------------------]
00029 //[ Includes                                              ]
00030 //[-------------------------------------------------------]
00031 #include "PLCore/String/String.h"
00032 
00033 
00034 //[-------------------------------------------------------]
00035 //[ Namespace                                             ]
00036 //[-------------------------------------------------------]
00037 namespace PLCore {
00038 
00039 
00040 //[-------------------------------------------------------]
00041 //[ Forward declarations                                  ]
00042 //[-------------------------------------------------------]
00043 class File;
00044 class XmlParsingData;
00045 
00046 
00047 //[-------------------------------------------------------]
00048 //[ Classes                                               ]
00049 //[-------------------------------------------------------]
00050 /**
00051 *  @brief
00052 *    Abstract XML base class
00053 *
00054 *  @remarks
00055 *    'XmlBase' is a base class for every class of the XML parser.
00056 *    It does little except to establish that parser classes can be printed and provide
00057 *    some utility functions. In XML, the document and elements can contain other
00058 *    elements and other types of nodes.
00059 *
00060 *    @verbatim
00061 *    A document can contain: Element (container or leaf)
00062 *                            Comment (leaf)
00063 *                            Unknown (leaf)
00064 *                            Declaration (leaf)
00065 *
00066 *    An element can contain: Element (container or leaf)
00067 *                            Text (leaf)
00068 *                            Attributes (not on tree)
00069 *                            Comment (leaf)
00070 *                            Unknown (leaf)
00071 *
00072 *    A declaration contains: Attributes (not on tree)
00073 *    @endverbatim
00074 */
00075 class XmlBase {
00076 
00077 
00078     //[-------------------------------------------------------]
00079     //[ Friends                                               ]
00080     //[-------------------------------------------------------]
00081     friend class XmlParsingData;
00082 
00083 
00084     //[-------------------------------------------------------]
00085     //[ Public definitions                                    ]
00086     //[-------------------------------------------------------]
00087     public:
00088         /**
00089         *  @brief
00090         *    Error codes
00091         */
00092         enum {
00093             NoError,                        /**< No error */
00094             Error,                          /**< Error */
00095             ErrorOpeningFile,               /**< Error opening file */
00096             ErrorParsingElement,            /**< Error parsing element */
00097             ErrorFailedToReadElementName,   /**< Error failed to read element name */
00098             ErrorReadingElementValue,       /**< Error reading element value */
00099             ErrorReadingAttributes,         /**< Error reading attributes */
00100             ErrorParsingEmpty,              /**< Error parsing empty */
00101             ErrorReadingEndTag,             /**< Error reading end tag */
00102             ErrorParsingUnknown,            /**< Error parsing unknown */
00103             ErrorParsingComment,            /**< Error parsing comment */
00104             ErrorParsingDeclaration,        /**< Error parsing declaration */
00105             ErrorDocumentEmpty,             /**< Error document empty */
00106             ErrorEmbeddedNull,              /**< Error embedded null */
00107             ErrorParsingCData,              /**< Error parsing CDATA */
00108             ErrorDocumentTopOnly,           /**< Error document top only */
00109             ErrorStringCount                /**< Error string count */
00110         };
00111 
00112         /**
00113         *  @brief
00114         *    Only used by 'Attribute::Query'-functions
00115         */
00116         enum EQueryResult {
00117             Success,        /**< All went fine */
00118             NoAttribute,    /**< Attribute does not exist */
00119             WrongType       /**< Invalid attribute type */
00120         };
00121 
00122         /**
00123         *  @brief
00124         *    Used by the parsing routines
00125         */
00126         enum EEncoding {
00127             EncodingUnknown,    /**< Unknown encoding (default) */
00128             EncodingUTF8,       /**< UTF8 encoding */
00129             EncodingLegacy      /**< Legacy encoding */
00130         };
00131 
00132 
00133     //[-------------------------------------------------------]
00134     //[ Public static functions                               ]
00135     //[-------------------------------------------------------]
00136     public:
00137         /**
00138         *  @brief
00139         *    Return the current white space setting
00140         *
00141         *  @return
00142         *    'true' if white space condensed is set, else 'false'
00143         *
00144         *  @remarks
00145         *    The world does not agree on whether white space should be kept or
00146         *    not. In order to make everyone happy, these global, static functions
00147         *    are provided to set whether or not the parser will condense all white space
00148         *    into a single space or not. The default is to condense. Note changing this
00149         *    values is not thread safe.
00150         */
00151         static inline bool IsWhiteSpaceCondensed();
00152 
00153         /**
00154         *  @brief
00155         *    Set white space handling
00156         *
00157         *  @param[in] bCondense
00158         *    'true' if white space condensed is set, else 'false'
00159         *
00160         *  @see
00161         *    - IsWhiteSpaceCondensed()
00162         */
00163         static inline void SetCondenseWhiteSpace(bool bCondense);
00164 
00165 
00166     //[-------------------------------------------------------]
00167     //[ Public functions                                      ]
00168     //[-------------------------------------------------------]
00169     public:
00170         /**
00171         *  @brief
00172         *    Destructor
00173         */
00174         PLCORE_API virtual ~XmlBase();
00175 
00176         /**
00177         *  @brief
00178         *    Return the row position, in the original source file, of this node or attribute
00179         *
00180         *  @return
00181         *    Row position
00182         *
00183         *  @remarks
00184         *    The row and column are 1-based. (That is the first row and first column is
00185         *    1, 1). If the returns values are 0 or less, then the parser does not have
00186         *    a row and column value.
00187         *    Generally, the row and column value will be set when the 'XmlDocument::Load()',
00188         *    'XmlDocument::Load()', or any 'XmlNode::Parse()' is called. It will NOT be set
00189         *    when the DOM was created from operator '>>'.
00190         *    The values reflect the initial load. Once the DOM is modified programmatically
00191         *    (by adding or changing nodes and attributes) the new values will NOT update to
00192         *    reflect changes in the document.
00193         *    There is a minor performance cost to computing the row and column. Computation
00194         *    can be disabled if 'XmlDocument::SetTabSize()' is called with 0 as the value.
00195         *
00196         *  @see
00197         *    - XmlDocument::SetTabSize()
00198         */
00199         inline int GetRow() const;
00200 
00201         /**
00202         *  @brief
00203         *    Return the column position, in the original source file, of this node or attribute
00204         *
00205         *  @return
00206         *    Column position
00207         *
00208         *  @see
00209         *    - GetRow()
00210         */
00211         inline int GetColumn() const;
00212 
00213         /**
00214         *  @brief
00215         *    Returns the user data
00216         *
00217         *  @return
00218         *    User data, can be a null pointer
00219         *
00220         *  @note
00221         *    - The user data is not used internally, it's really user only :)
00222         */
00223         inline void *GetUserData() const;
00224 
00225         /**
00226         *  @brief
00227         *    Sets the user data
00228         *
00229         *  @param[in] pUser
00230         *    User data, can be a null pointer
00231         *
00232         *  @see
00233         *    - GetUserData()
00234         */
00235         inline void SetUserData(void *pUser);
00236 
00237 
00238     //[-------------------------------------------------------]
00239     //[ Public virtual XmlBase functions                      ]
00240     //[-------------------------------------------------------]
00241     public:
00242         /**
00243         *  @brief
00244         *    Save function
00245         *
00246         *  @param[out] cFile
00247         *    File to write in, must be opened and writable
00248         *  @param[in]  nDepth
00249         *    Current depth
00250         *
00251         *  @return
00252         *    'true' if all went fine, else 'false'
00253         */
00254         virtual bool Save(File &cFile, uint32 nDepth = 0) = 0;
00255 
00256         /**
00257         *  @brief
00258         *    Output as string function
00259         *
00260         *  @param[in] nDepth
00261         *    Current depth
00262         *
00263         *  @return
00264         *    String containing the XML
00265         */
00266         virtual String ToString(uint32 nDepth = 0) const = 0;
00267 
00268         /**
00269         *  @brief
00270         *    Parse the given null terminated block of XML data
00271         *
00272         *  @param[in] pszData
00273         *    Parsing data, if a null pointer, an error will be returned
00274         *  @param[in] pData
00275         *    Parsing data, can be a null pointer
00276         *  @param[in] nEncoding
00277         *    Encoding
00278         *
00279         *  @return
00280         *    The pointer to the parameter 'pszData' if all went fine, else a null pointer
00281         *
00282         *  @remarks
00283         *    Passing in an encoding to this method (either 'EncodingLegacy' or
00284         *    'EncodingUTF8' will force the parser to use that encoding, regardless
00285         *    of what the parser might otherwise try to detect.
00286         */
00287         virtual const char *Parse(const char *pszData, XmlParsingData *pData = nullptr, EEncoding nEncoding = EncodingUnknown) = 0;
00288 
00289 
00290     //[-------------------------------------------------------]
00291     //[ Protected definitions                                 ]
00292     //[-------------------------------------------------------]
00293     protected:
00294         /**
00295         *  @brief
00296         *    Internal structure for tracking location of items in the XML file
00297         */
00298         struct Cursor {
00299             Cursor()        { nRow = nColumn = -1; }
00300             void Clear()    { nRow = nColumn = -1; }
00301 
00302             int nRow;       // 0 based
00303             int nColumn;    // 0 based
00304         };
00305 
00306         // Bunch of unicode info at:
00307         //      http://www.unicode.org/faq/utf_bom.html
00308         // Including the basic of this table, which determines the #bytes in the
00309         // sequence from the lead byte. 1 placed for invalid sequences --
00310         // although the result will be junk, pass it through as much as possible.
00311         // Beware of the non-characters in UTF-8:   
00312         //              ef bb bf (Microsoft "lead bytes")
00313         //              ef bf be
00314         //              ef bf bf 
00315         static const unsigned char UTF_LEAD_0 = 0xefU;
00316         static const unsigned char UTF_LEAD_1 = 0xbbU;
00317         static const unsigned char UTF_LEAD_2 = 0xbfU;
00318 
00319 
00320     //[-------------------------------------------------------]
00321     //[ Protected static functions                            ]
00322     //[-------------------------------------------------------]
00323     protected:
00324         static const char *SkipWhiteSpace(const char *pszData, EEncoding nEncoding);
00325         static bool IsWhiteSpace(char c);
00326         static bool IsWhiteSpace(int c);
00327 
00328         /**
00329         *  @brief
00330         *    Reads an XML name into the string provided
00331         *
00332         *  @param[in]  pszData
00333         *    Data
00334         *  @param[out] sName
00335         *    Read name
00336         *  @param[in] nEncoding
00337         *    Encoding
00338         *
00339         *  @return
00340         *    Returns a pointer just past the last character of the name, or 0 if the function has an error
00341         */
00342         static const char *ReadName(const char *pszData, String &sName, EEncoding nEncoding);
00343 
00344         /**
00345         *  @brief
00346         *    Reads text
00347         *
00348         *  @param[in]  pszData
00349         *    Where to start
00350         *  @param[out] sText
00351         *    The string read
00352         *  @param[in]  bTrimWhiteSpace
00353         *    Whether to keep the white space
00354         *  @param[in]  pszEndTag
00355         *    What ends this text
00356         *  @param[in]  bCaseInsensitive
00357         *    Whether to ignore case in the end tag
00358         *  @param[in]  nEncoding
00359         *    The current encoding
00360         *
00361         *  @return
00362         *    Returns a pointer past the given end tag
00363         *
00364         *  @note
00365         *    - Wickedly complex options, but it keeps the (sensitive) code in one place.
00366         */
00367         static const char *ReadText(const char *pszData, String &sText, bool bTrimWhiteSpace, const char *pszEndTag, bool bCaseInsensitive, EEncoding nEncoding);
00368 
00369         // If an entity has been found, transform it into a character.
00370         static const char *GetEntity(const char *pszData, char *pszValue, int &nLength, EEncoding nEncoding);
00371 
00372         // Get a character, while interpreting entities.
00373         // The length can be from 0 to 4 bytes.
00374         static const char *GetChar(const char *pszData, char *pszValue, int &nLength, EEncoding nEncoding);
00375 
00376         // Return true if the next characters in the stream are any of the endTag sequences.
00377         // Ignore case only works for english, and should only be relied on when comparing
00378         // to English words: StringEqual(pszData, "version", true) is fine.
00379         static bool StringEqual(const char *pszData, const char *pszTag, bool bIgnoreCase, EEncoding nEncoding);
00380 
00381         // None of these methods are reliable for any language except English.
00382         // Good for approximation, not great for accuracy.
00383         static int IsAlpha(unsigned char nByte);
00384         static int IsAlphaNum(unsigned char nByte);
00385         static int ToLower(int nValue, EEncoding nEncoding);
00386         static void ConvertUTF32ToUTF8(unsigned long nInput, char *pszOutput, int &nLength);
00387 
00388         // Expands entities in a string. Note this should not contain the tag's '<', '>', etc, 
00389         // or they will be transformed into entities!
00390         static void EncodeString(const String &sInString, String &sOutString);
00391 
00392 
00393     //[-------------------------------------------------------]
00394     //[ Protected functions                                   ]
00395     //[-------------------------------------------------------]
00396     protected:
00397         /**
00398         *  @brief
00399         *    Default constructor
00400         */
00401         XmlBase();
00402 
00403 
00404     //[-------------------------------------------------------]
00405     //[ Protected data                                        ]
00406     //[-------------------------------------------------------]
00407     protected:
00408         void   *m_pUserData;    /**< User data, can be a null pointer */
00409         Cursor  m_cCursor;      /**< Cursor */
00410 
00411 
00412     //[-------------------------------------------------------]
00413     //[ Private functions                                     ]
00414     //[-------------------------------------------------------]
00415     private:
00416         /**
00417         *  @brief
00418         *    Copy constructor
00419         *
00420         *  @param[in] cSource
00421         *    Source to copy from
00422         */
00423         XmlBase(const XmlBase &cSource);
00424 
00425         /**
00426         *  @brief
00427         *    Copy operator
00428         *
00429         *  @param[in] cSource
00430         *    Source to copy from
00431         *
00432         *  @return
00433         *    Reference to this instance
00434         */
00435         XmlBase &operator =(const XmlBase &cSource);
00436 
00437 
00438     //[-------------------------------------------------------]
00439     //[ Private static data                                   ]
00440     //[-------------------------------------------------------]
00441     private:
00442         static const int utf8ByteTable[256];    /**< Table that returns, for a given lead byte, the total number of bytes in the UTF-8 sequence */
00443         struct Entity {
00444             String sString;
00445             char   nCharacter;
00446         };
00447         enum {
00448             NumOfEntities = 5
00449         };
00450         static Entity sEntity[NumOfEntities];
00451         static PLCORE_API bool bCondenseWhiteSpace;
00452 
00453 
00454 };
00455 
00456 
00457 //[-------------------------------------------------------]
00458 //[ Namespace                                             ]
00459 //[-------------------------------------------------------]
00460 } // PLCore
00461 
00462 
00463 //[-------------------------------------------------------]
00464 //[ Implementation                                        ]
00465 //[-------------------------------------------------------]
00466 #include "PLCore/Xml/XmlBase.inl"
00467 
00468 
00469 #endif // __PLCORE_XML_BASE_H__