PixelLightAPI  .
RegEx.h
Go to the documentation of this file.
00001 /*********************************************************\
00002  *  File: RegEx.h                                        *
00003  *
00004  *  Copyright (C) 2002-2012 The PixelLight Team (http://www.pixellight.org/)
00005  *
00006  *  This file is part of PixelLight.
00007  *
00008  *  PixelLight is free software: you can redistribute it and/or modify
00009  *  it under the terms of the GNU Lesser General Public License as published by
00010  *  the Free Software Foundation, either version 3 of the License, or
00011  *  (at your option) any later version.
00012  *
00013  *  PixelLight is distributed in the hope that it will be useful,
00014  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00016  *  GNU Lesser General Public License for more details.
00017  *
00018  *  You should have received a copy of the GNU Lesser General Public License
00019  *  along with PixelLight. If not, see <http://www.gnu.org/licenses/>.
00020 \*********************************************************/
00021 
00022 
00023 #ifndef __PLCORE_REGEX_H__
00024 #define __PLCORE_REGEX_H__
00025 #pragma once
00026 
00027 
00028 //[-------------------------------------------------------]
00029 //[ Includes                                              ]
00030 //[-------------------------------------------------------]
00031 #include "PLCore/String/String.h"
00032 #include "PLCore/Container/Array.h"
00033 #include "PLCore/Container/HashMap.h"
00034 
00035 
00036 //[-------------------------------------------------------]
00037 //[ Forward declarations                                  ]
00038 //[-------------------------------------------------------]
00039 struct real_pcre;
00040 typedef struct real_pcre pcre;
00041 typedef struct pcre_extra pcre_extra;
00042 
00043 
00044 //[-------------------------------------------------------]
00045 //[ Namespace                                             ]
00046 //[-------------------------------------------------------]
00047 namespace PLCore {
00048 
00049 
00050 //[-------------------------------------------------------]
00051 //[ Classes                                               ]
00052 //[-------------------------------------------------------]
00053 /**
00054 *  @brief
00055 *    Regular expression class (PCRE syntax)
00056 *
00057 *  @remarks
00058 *    Internally the PCRE library (http://www.pcre.org) is used -
00059 *    this here is only a wrapper interface.
00060 *    This class supports ASCII and UTF8 encoding. You can force a certain encoding by setting
00061 *    special flags like 'EncodingASCII' for forced ASCII encoding. If no such encoding flags
00062 *    are set by the user, the internal string format of the given regular expression is used
00063 *    as encoding.
00064 */
00065 class RegEx {
00066 
00067 
00068     //[-------------------------------------------------------]
00069     //[ Public definitions                                    ]
00070     //[-------------------------------------------------------]
00071     public:
00072         /**
00073         *  @brief
00074         *    Matching and encoding flags
00075         */
00076         enum EFlags {
00077             MatchCaseSensitive  = 1,    /**< Match is case sensitive */
00078             MatchGreedy         = 2,    /**< Match is greedy */
00079             EncodingASCII       = 4,    /**< ASCII encoding, 1 byte per character (American Standard Code for Information Interchange, 0-128 defined, above undefined) */
00080             EncodingUTF8        = 8,    /**< UTF8 encoding (8-bit Unicode Transformation Format) with 1 to 6 bytes to encode one Unicode character */
00081             Multiline           = 16,   /**< PCRE-documentation says: "The "start of line" and "end of line" constructs match immediately following or immediately before internal newlines in the subject string, respectively, as well as at the very start and end" */
00082             DotAll              = 32    /**< PCRE-documentation says: "A dot metacharacter in the pattern matches a character of any value, including one that indicates a newline" */
00083         };
00084 
00085 
00086     //[-------------------------------------------------------]
00087     //[ Public static functions                               ]
00088     //[-------------------------------------------------------]
00089     public:
00090         /**
00091         *  @brief
00092         *    Converts a given wildcard into a regular expression
00093         *
00094         *  @param[in] sWildcard
00095         *    String with wildcard to convert (for example "BeerNumber*")
00096         *
00097         *  @return
00098         *    The converted regular expression (for example "^BeerNumber.*$")
00099         */
00100         static PLCORE_API String WildcardToRegEx(const String &sWildcard);
00101 
00102 
00103     //[-------------------------------------------------------]
00104     //[ Public functions                                      ]
00105     //[-------------------------------------------------------]
00106     public:
00107         /**
00108         *  @brief
00109         *    Constructor
00110         */
00111         inline RegEx();
00112 
00113         /**
00114         *  @brief
00115         *    Constructor
00116         *
00117         *  @param[in] sExpression
00118         *    Regular expression
00119         *  @param[in] nFlags
00120         *    Processing mode and encoding (combination of EFlags values)
00121         */
00122         inline RegEx(const String &sExpression, uint32 nFlags = MatchCaseSensitive | MatchGreedy);
00123 
00124         /**
00125         *  @brief
00126         *    Copy constructor
00127         *
00128         *  @param[in] cRegEx
00129         *    RegEx to copy
00130         */
00131         inline RegEx(const RegEx &cRegEx);
00132 
00133         /**
00134         *  @brief
00135         *    Destructor
00136         */
00137         PLCORE_API ~RegEx();
00138 
00139         /**
00140         *  @brief
00141         *    Assignment operator
00142         *
00143         *  @param[in] cRegEx
00144         *    RegEx to copy
00145         *
00146         *  @return
00147         *    Reference to this instance
00148         */
00149         inline RegEx &operator =(const RegEx &cRegEx);
00150 
00151         /**
00152         *  @brief
00153         *    Get regular expression
00154         *
00155         *  @return
00156         *    Regular expression
00157         */
00158         inline String GetExpression() const;
00159 
00160         /**
00161         *  @brief
00162         *    Set regular expression
00163         *
00164         *  @param[in] sExpression
00165         *    Regular expression
00166         *  @param[in] nFlags
00167         *    Processing mode  and encoding (combination of EFlags values)
00168         */
00169         PLCORE_API void SetExpression(const String &sExpression, uint32 nFlags = MatchCaseSensitive | MatchGreedy);
00170 
00171         /**
00172         *  @brief
00173         *    Check if the regular expression is valid
00174         *
00175         *  @return
00176         *    Regular expression
00177         */
00178         inline bool IsValid() const;
00179 
00180         /**
00181         *  @brief
00182         *    Get mode and encoding flags
00183         *
00184         *  @return
00185         *    Mode and encoding flags (combination of EFlags values)
00186         */
00187         inline uint32 GetFlags() const;
00188 
00189         /**
00190         *  @brief
00191         *    Is the expression case sensitive?
00192         *
00193         *  @return
00194         *    'true', if the expression is treated case sensitive, else 'false'
00195         */
00196         inline bool IsCaseSensitive() const;
00197 
00198         /**
00199         *  @brief
00200         *    Is the expression greedy?
00201         *
00202         *  @return
00203         *    'true', if the matching is greedy, else 'false'
00204         */
00205         inline bool IsGreedy() const;
00206 
00207         /**
00208         *  @brief
00209         *    Is ASCII encoding used?
00210         *
00211         *  @return
00212         *    'true', if ASCII encoding, else 'false'
00213         */
00214         inline bool IsASCII() const;
00215 
00216         /**
00217         *  @brief
00218         *    Is UTF8 encoding used?
00219         *
00220         *  @return
00221         *    'true', if UTF8 encoding, else 'false'
00222         */
00223         inline bool IsUTF8() const;
00224 
00225         /**
00226         *  @brief
00227         *    Analyze the regular expression and store additional internal information
00228         *
00229         *  @remarks
00230         *    This function analyses and stores additional internal information about the
00231         *    regular expression, which is later used to speed up processing. So you should
00232         *    call this e.g. for rather complicated expressions and/or expressions that
00233         *    are used many times.
00234         */
00235         PLCORE_API void Study();
00236 
00237         /**
00238         *  @brief
00239         *    Check if a string matches the given regular expression
00240         *
00241         *  @param[in] sSubject
00242         *    String to test
00243         *  @param[in] nPosition
00244         *    Start byte position in the string, for instance a value received by 'GetPosition()'
00245         *
00246         *  @remarks
00247         *    'sSubject' is internally automatically converted into the internal string format this
00248         *    regular expression is using. You can for instance use 'IsASCII()' to check whether or
00249         *    not ASCII is used.
00250         *
00251         *  @return
00252         *    'true' if the string matches, else 'false'
00253         */
00254         PLCORE_API bool Match(const String &sSubject, uint32 nPosition = 0);
00255 
00256         /**
00257         *  @brief
00258         *    Get current byte position in the subject string
00259         *
00260         *  @return
00261         *    New byte position in the subject string after the last match
00262         */
00263         inline int GetPosition() const;
00264 
00265         /**
00266         *  @brief
00267         *    Get the results of the last check
00268         *
00269         *  @return
00270         *    List containing all matching groups
00271         */
00272         inline const Array<String> &GetResults() const;
00273 
00274         /**
00275         *  @brief
00276         *    Get a matching group from the last check
00277         *
00278         *  @param[in] nIndex
00279         *    Which group shall be returned?
00280         *
00281         *  @return
00282         *    The matching substring or ""
00283         */
00284         inline String GetResult(uint32 nIndex) const;
00285 
00286         /**
00287         *  @brief
00288         *    Get the results of the last check by name
00289         *
00290         *  @return
00291         *    Map containing all matching groups as pairs of name->substring
00292         */
00293         inline const Map<String, String> &GetNameResults() const;
00294 
00295         /**
00296         *  @brief
00297         *    Get a matching group from the last check by name
00298         *
00299         *  @param[in] sName
00300         *    Which group shall be returned?
00301         *
00302         *  @return
00303         *    The matching substring or ""
00304         */
00305         inline String GetNameResult(const String &sName) const;
00306 
00307         /**
00308         *  @brief
00309         *    Clear all results
00310         */
00311         inline void ClearResults();
00312 
00313 
00314     //[-------------------------------------------------------]
00315     //[ Private functions                                     ]
00316     //[-------------------------------------------------------]
00317     private:
00318         /**
00319         *  @brief
00320         *    Compiles the PCRE expression
00321         */
00322         void CompilePCRE();
00323 
00324         /**
00325         *  @brief
00326         *    Deletes the PCRE expression
00327         */
00328         void FreePCRE();
00329 
00330 
00331     //[-------------------------------------------------------]
00332     //[ Private data                                          ]
00333     //[-------------------------------------------------------]
00334     private:
00335         // Regular expression
00336         String                   m_sExpression;     /**< Regular expression */
00337         pcre                    *m_pPCRE;           /**< PCRE expression, can be a null pointer */
00338         pcre_extra              *m_pExtra;          /**< PCRE extra data, can be a null pointer */
00339         uint32                   m_nFlags;          /**< Processing mode and encoding (combination of EFlags values) */
00340 
00341         // Matches
00342         Array<String>            m_lstGroups;       /**< List of substrings from the last match */
00343         HashMap<String, String>  m_mapGroups;       /**< Map of name->substrings pairs from the last match */
00344         int                      m_nPosition;       /**< Current byte position in the subject */
00345 
00346 
00347 };
00348 
00349 
00350 //[-------------------------------------------------------]
00351 //[ Namespace                                             ]
00352 //[-------------------------------------------------------]
00353 } // PLCore
00354 
00355 
00356 //[-------------------------------------------------------]
00357 //[ Implementation                                        ]
00358 //[-------------------------------------------------------]
00359 #include "PLCore/String/RegEx.inl"
00360 
00361 
00362 #endif // __PLCORE_REGEX_H__


PixelLight PixelLight 0.9.11-R1
Copyright (C) 2002-2012 by The PixelLight Team
Last modified Thu Feb 23 2012 14:08:58
The content of this PixelLight document is published under the
Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported