PixelLightAPI
.
|
00001 /*********************************************************\ 00002 * File: RegEx.h * 00003 * 00004 * Copyright (C) 2002-2012 The PixelLight Team (http://www.pixellight.org/) 00005 * 00006 * This file is part of PixelLight. 00007 * 00008 * PixelLight is free software: you can redistribute it and/or modify 00009 * it under the terms of the GNU Lesser General Public License as published by 00010 * the Free Software Foundation, either version 3 of the License, or 00011 * (at your option) any later version. 00012 * 00013 * PixelLight is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 * GNU Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public License 00019 * along with PixelLight. If not, see <http://www.gnu.org/licenses/>. 00020 \*********************************************************/ 00021 00022 00023 #ifndef __PLCORE_REGEX_H__ 00024 #define __PLCORE_REGEX_H__ 00025 #pragma once 00026 00027 00028 //[-------------------------------------------------------] 00029 //[ Includes ] 00030 //[-------------------------------------------------------] 00031 #include "PLCore/String/String.h" 00032 #include "PLCore/Container/Array.h" 00033 #include "PLCore/Container/HashMap.h" 00034 00035 00036 //[-------------------------------------------------------] 00037 //[ Forward declarations ] 00038 //[-------------------------------------------------------] 00039 struct real_pcre; 00040 typedef struct real_pcre pcre; 00041 typedef struct pcre_extra pcre_extra; 00042 00043 00044 //[-------------------------------------------------------] 00045 //[ Namespace ] 00046 //[-------------------------------------------------------] 00047 namespace PLCore { 00048 00049 00050 //[-------------------------------------------------------] 00051 //[ Classes ] 00052 //[-------------------------------------------------------] 00053 /** 00054 * @brief 00055 * Regular expression class (PCRE syntax) 00056 * 00057 * @remarks 00058 * Internally the PCRE library (http://www.pcre.org) is used - 00059 * this here is only a wrapper interface. 00060 * This class supports ASCII and UTF8 encoding. You can force a certain encoding by setting 00061 * special flags like 'EncodingASCII' for forced ASCII encoding. If no such encoding flags 00062 * are set by the user, the internal string format of the given regular expression is used 00063 * as encoding. 00064 */ 00065 class RegEx { 00066 00067 00068 //[-------------------------------------------------------] 00069 //[ Public definitions ] 00070 //[-------------------------------------------------------] 00071 public: 00072 /** 00073 * @brief 00074 * Matching and encoding flags 00075 */ 00076 enum EFlags { 00077 MatchCaseSensitive = 1, /**< Match is case sensitive */ 00078 MatchGreedy = 2, /**< Match is greedy */ 00079 EncodingASCII = 4, /**< ASCII encoding, 1 byte per character (American Standard Code for Information Interchange, 0-128 defined, above undefined) */ 00080 EncodingUTF8 = 8, /**< UTF8 encoding (8-bit Unicode Transformation Format) with 1 to 6 bytes to encode one Unicode character */ 00081 Multiline = 16, /**< PCRE-documentation says: "The "start of line" and "end of line" constructs match immediately following or immediately before internal newlines in the subject string, respectively, as well as at the very start and end" */ 00082 DotAll = 32 /**< PCRE-documentation says: "A dot metacharacter in the pattern matches a character of any value, including one that indicates a newline" */ 00083 }; 00084 00085 00086 //[-------------------------------------------------------] 00087 //[ Public static functions ] 00088 //[-------------------------------------------------------] 00089 public: 00090 /** 00091 * @brief 00092 * Converts a given wildcard into a regular expression 00093 * 00094 * @param[in] sWildcard 00095 * String with wildcard to convert (for example "BeerNumber*") 00096 * 00097 * @return 00098 * The converted regular expression (for example "^BeerNumber.*$") 00099 */ 00100 static PLCORE_API String WildcardToRegEx(const String &sWildcard); 00101 00102 00103 //[-------------------------------------------------------] 00104 //[ Public functions ] 00105 //[-------------------------------------------------------] 00106 public: 00107 /** 00108 * @brief 00109 * Constructor 00110 */ 00111 inline RegEx(); 00112 00113 /** 00114 * @brief 00115 * Constructor 00116 * 00117 * @param[in] sExpression 00118 * Regular expression 00119 * @param[in] nFlags 00120 * Processing mode and encoding (combination of EFlags values) 00121 */ 00122 inline RegEx(const String &sExpression, uint32 nFlags = MatchCaseSensitive | MatchGreedy); 00123 00124 /** 00125 * @brief 00126 * Copy constructor 00127 * 00128 * @param[in] cRegEx 00129 * RegEx to copy 00130 */ 00131 inline RegEx(const RegEx &cRegEx); 00132 00133 /** 00134 * @brief 00135 * Destructor 00136 */ 00137 PLCORE_API ~RegEx(); 00138 00139 /** 00140 * @brief 00141 * Assignment operator 00142 * 00143 * @param[in] cRegEx 00144 * RegEx to copy 00145 * 00146 * @return 00147 * Reference to this instance 00148 */ 00149 inline RegEx &operator =(const RegEx &cRegEx); 00150 00151 /** 00152 * @brief 00153 * Get regular expression 00154 * 00155 * @return 00156 * Regular expression 00157 */ 00158 inline String GetExpression() const; 00159 00160 /** 00161 * @brief 00162 * Set regular expression 00163 * 00164 * @param[in] sExpression 00165 * Regular expression 00166 * @param[in] nFlags 00167 * Processing mode and encoding (combination of EFlags values) 00168 */ 00169 PLCORE_API void SetExpression(const String &sExpression, uint32 nFlags = MatchCaseSensitive | MatchGreedy); 00170 00171 /** 00172 * @brief 00173 * Check if the regular expression is valid 00174 * 00175 * @return 00176 * Regular expression 00177 */ 00178 inline bool IsValid() const; 00179 00180 /** 00181 * @brief 00182 * Get mode and encoding flags 00183 * 00184 * @return 00185 * Mode and encoding flags (combination of EFlags values) 00186 */ 00187 inline uint32 GetFlags() const; 00188 00189 /** 00190 * @brief 00191 * Is the expression case sensitive? 00192 * 00193 * @return 00194 * 'true', if the expression is treated case sensitive, else 'false' 00195 */ 00196 inline bool IsCaseSensitive() const; 00197 00198 /** 00199 * @brief 00200 * Is the expression greedy? 00201 * 00202 * @return 00203 * 'true', if the matching is greedy, else 'false' 00204 */ 00205 inline bool IsGreedy() const; 00206 00207 /** 00208 * @brief 00209 * Is ASCII encoding used? 00210 * 00211 * @return 00212 * 'true', if ASCII encoding, else 'false' 00213 */ 00214 inline bool IsASCII() const; 00215 00216 /** 00217 * @brief 00218 * Is UTF8 encoding used? 00219 * 00220 * @return 00221 * 'true', if UTF8 encoding, else 'false' 00222 */ 00223 inline bool IsUTF8() const; 00224 00225 /** 00226 * @brief 00227 * Analyze the regular expression and store additional internal information 00228 * 00229 * @remarks 00230 * This function analyses and stores additional internal information about the 00231 * regular expression, which is later used to speed up processing. So you should 00232 * call this e.g. for rather complicated expressions and/or expressions that 00233 * are used many times. 00234 */ 00235 PLCORE_API void Study(); 00236 00237 /** 00238 * @brief 00239 * Check if a string matches the given regular expression 00240 * 00241 * @param[in] sSubject 00242 * String to test 00243 * @param[in] nPosition 00244 * Start byte position in the string, for instance a value received by 'GetPosition()' 00245 * 00246 * @remarks 00247 * 'sSubject' is internally automatically converted into the internal string format this 00248 * regular expression is using. You can for instance use 'IsASCII()' to check whether or 00249 * not ASCII is used. 00250 * 00251 * @return 00252 * 'true' if the string matches, else 'false' 00253 */ 00254 PLCORE_API bool Match(const String &sSubject, uint32 nPosition = 0); 00255 00256 /** 00257 * @brief 00258 * Get current byte position in the subject string 00259 * 00260 * @return 00261 * New byte position in the subject string after the last match 00262 */ 00263 inline int GetPosition() const; 00264 00265 /** 00266 * @brief 00267 * Get the results of the last check 00268 * 00269 * @return 00270 * List containing all matching groups 00271 */ 00272 inline const Array<String> &GetResults() const; 00273 00274 /** 00275 * @brief 00276 * Get a matching group from the last check 00277 * 00278 * @param[in] nIndex 00279 * Which group shall be returned? 00280 * 00281 * @return 00282 * The matching substring or "" 00283 */ 00284 inline String GetResult(uint32 nIndex) const; 00285 00286 /** 00287 * @brief 00288 * Get the results of the last check by name 00289 * 00290 * @return 00291 * Map containing all matching groups as pairs of name->substring 00292 */ 00293 inline const Map<String, String> &GetNameResults() const; 00294 00295 /** 00296 * @brief 00297 * Get a matching group from the last check by name 00298 * 00299 * @param[in] sName 00300 * Which group shall be returned? 00301 * 00302 * @return 00303 * The matching substring or "" 00304 */ 00305 inline String GetNameResult(const String &sName) const; 00306 00307 /** 00308 * @brief 00309 * Clear all results 00310 */ 00311 inline void ClearResults(); 00312 00313 00314 //[-------------------------------------------------------] 00315 //[ Private functions ] 00316 //[-------------------------------------------------------] 00317 private: 00318 /** 00319 * @brief 00320 * Compiles the PCRE expression 00321 */ 00322 void CompilePCRE(); 00323 00324 /** 00325 * @brief 00326 * Deletes the PCRE expression 00327 */ 00328 void FreePCRE(); 00329 00330 00331 //[-------------------------------------------------------] 00332 //[ Private data ] 00333 //[-------------------------------------------------------] 00334 private: 00335 // Regular expression 00336 String m_sExpression; /**< Regular expression */ 00337 pcre *m_pPCRE; /**< PCRE expression, can be a null pointer */ 00338 pcre_extra *m_pExtra; /**< PCRE extra data, can be a null pointer */ 00339 uint32 m_nFlags; /**< Processing mode and encoding (combination of EFlags values) */ 00340 00341 // Matches 00342 Array<String> m_lstGroups; /**< List of substrings from the last match */ 00343 HashMap<String, String> m_mapGroups; /**< Map of name->substrings pairs from the last match */ 00344 int m_nPosition; /**< Current byte position in the subject */ 00345 00346 00347 }; 00348 00349 00350 //[-------------------------------------------------------] 00351 //[ Namespace ] 00352 //[-------------------------------------------------------] 00353 } // PLCore 00354 00355 00356 //[-------------------------------------------------------] 00357 //[ Implementation ] 00358 //[-------------------------------------------------------] 00359 #include "PLCore/String/RegEx.inl" 00360 00361 00362 #endif // __PLCORE_REGEX_H__
|