PixelLightAPI
.
|
00001 /*********************************************************\ 00002 * File: HTMLParser.h * 00003 * 00004 * Copyright (C) 2002-2012 The PixelLight Team (http://www.pixellight.org/) 00005 * 00006 * This file is part of PixelLight. 00007 * 00008 * PixelLight is free software: you can redistribute it and/or modify 00009 * it under the terms of the GNU Lesser General Public License as published by 00010 * the Free Software Foundation, either version 3 of the License, or 00011 * (at your option) any later version. 00012 * 00013 * PixelLight is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 * GNU Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public License 00019 * along with PixelLight. If not, see <http://www.gnu.org/licenses/>. 00020 \*********************************************************/ 00021 00022 00023 #ifndef __PLCORE_HTMLPARSER_H__ 00024 #define __PLCORE_HTMLPARSER_H__ 00025 #pragma once 00026 00027 00028 //[-------------------------------------------------------] 00029 //[ Includes ] 00030 //[-------------------------------------------------------] 00031 #include "PLCore/String/String.h" 00032 00033 00034 //[-------------------------------------------------------] 00035 //[ Namespace ] 00036 //[-------------------------------------------------------] 00037 namespace PLCore { 00038 00039 00040 //[-------------------------------------------------------] 00041 //[ Forward declarations ] 00042 //[-------------------------------------------------------] 00043 class XmlDocument; 00044 00045 00046 //[-------------------------------------------------------] 00047 //[ Classes ] 00048 //[-------------------------------------------------------] 00049 /** 00050 * @brief 00051 * HTML parser 00052 * 00053 * @remarks 00054 * This class provides a simple HTML parser. It does NOT comply with any existent HTML standard, 00055 * but it tries to be as flexible as possible in allowing many typical errors such as mixed HTML 00056 * and XML style as well as not properly closed tags. So don't expect this class to be able of parsing 00057 * any existing homepage out there. However, the class can be used to read in an HTML file and 00058 * produce an XML syntax tree as if it were an XML file. You can then use standard XML classes to 00059 * access the parsed document tree. 00060 */ 00061 class HTMLParser { 00062 00063 00064 //[-------------------------------------------------------] 00065 //[ Public functions ] 00066 //[-------------------------------------------------------] 00067 public: 00068 /** 00069 * @brief 00070 * Constructor 00071 */ 00072 inline HTMLParser(); 00073 00074 /** 00075 * @brief 00076 * Destructor 00077 */ 00078 inline ~HTMLParser(); 00079 00080 /** 00081 * @brief 00082 * Clear all data 00083 */ 00084 PLCORE_API void Clear(); 00085 00086 /** 00087 * @brief 00088 * Load file 00089 * 00090 * @param[in] sFilename 00091 * Filename of the HTML document to load in 00092 */ 00093 PLCORE_API void Load(const String &sFilename); 00094 00095 /** 00096 * @brief 00097 * Get parsed HTML as an XML document 00098 * 00099 * @return 00100 * Pointer to XML document, a null pointer if there's currently no document 00101 */ 00102 inline XmlDocument *GetXML() const; 00103 00104 /** 00105 * @brief 00106 * Get number of errors occurred while parsing the HTML file 00107 * 00108 * @return 00109 * Number of errors 00110 */ 00111 inline uint32 GetNumOfErrors() const; 00112 00113 00114 //[-------------------------------------------------------] 00115 //[ Private functions ] 00116 //[-------------------------------------------------------] 00117 private: 00118 /** 00119 * @brief 00120 * Parse whole HTML file and generate a XML parsing tree 00121 * 00122 * @return 00123 * 'true' if HTML file could be parsed, 'false' on error 00124 */ 00125 bool Parse(); 00126 00127 /** 00128 * @brief 00129 * Check if there is another token waiting 00130 * 00131 * @return 00132 * 'true' if next token is available, else 'false' 00133 */ 00134 bool HasNextToken(); 00135 00136 /** 00137 * @brief 00138 * Get next token from HTML parser 00139 * 00140 * @return 00141 * Next token, or "" when there are no more tokens 00142 */ 00143 String GetNextToken(); 00144 00145 /** 00146 * @brief 00147 * Returns whether or not the given string is a single tag (no closing tag required to be well-formed!) 00148 * 00149 * @param[in] sTag 00150 * Tag to check 00151 * 00152 * @return 00153 * 'true' the given string is a single tag, else 'false' 00154 */ 00155 inline bool IsSingleTag(const String &sTag) const; 00156 00157 00158 //[-------------------------------------------------------] 00159 //[ Private data ] 00160 //[-------------------------------------------------------] 00161 private: 00162 String m_sFilename; /**< File name */ 00163 String m_sText; /**< Content of HTML file */ 00164 String m_sTextLower; /**< Text version in lower case */ 00165 uint32 m_nPos; /**< Parsing position */ 00166 String m_sToken; /**< Currently parsed token */ 00167 XmlDocument *m_pXML; /**< Parsed HTML, can be a null pointer */ 00168 uint32 m_nErrors; /**< Number of errors */ 00169 00170 00171 }; 00172 00173 00174 //[-------------------------------------------------------] 00175 //[ Namespace ] 00176 //[-------------------------------------------------------] 00177 } // PLCore 00178 00179 00180 //[-------------------------------------------------------] 00181 //[ Implementation ] 00182 //[-------------------------------------------------------] 00183 #include "PLCore/Tools/HTMLParser.inl" 00184 00185 00186 #endif // __PLCORE_HTMLPARSER_H__
|