PixelLightAPI  .
UTF8Tools.h
Go to the documentation of this file.
00001 /*********************************************************\
00002  *  File: UTF8Tools.h                                    *
00003  *
00004  *  Copyright (C) 2002-2012 The PixelLight Team (http://www.pixellight.org/)
00005  *
00006  *  This file is part of PixelLight.
00007  *
00008  *  PixelLight is free software: you can redistribute it and/or modify
00009  *  it under the terms of the GNU Lesser General Public License as published by
00010  *  the Free Software Foundation, either version 3 of the License, or
00011  *  (at your option) any later version.
00012  *
00013  *  PixelLight is distributed in the hope that it will be useful,
00014  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
00016  *  GNU Lesser General Public License for more details.
00017  *
00018  *  You should have received a copy of the GNU Lesser General Public License
00019  *  along with PixelLight. If not, see <http://www.gnu.org/licenses/>.
00020 \*********************************************************/
00021 
00022 
00023 #ifndef __PLCORE_UTF8TOOLS_H__
00024 #define __PLCORE_UTF8TOOLS_H__
00025 #pragma once
00026 
00027 
00028 //[-------------------------------------------------------]
00029 //[ Includes                                              ]
00030 //[-------------------------------------------------------]
00031 #include "PLCore/PLCore.h"
00032 
00033 
00034 //[-------------------------------------------------------]
00035 //[ Namespace                                             ]
00036 //[-------------------------------------------------------]
00037 namespace PLCore {
00038 
00039 
00040 //[-------------------------------------------------------]
00041 //[ Classes                                               ]
00042 //[-------------------------------------------------------]
00043 /**
00044 *  @brief
00045 *    Static class with UTF8 tool functions
00046 */
00047 class UTF8Tools {
00048 
00049 
00050     //[-------------------------------------------------------]
00051     //[ Public static functions                               ]
00052     //[-------------------------------------------------------]
00053     public:
00054         /**
00055         *  @brief
00056         *    Returns whether the given character is an octal digit
00057         *
00058         *  @param[in] nCharacter
00059         *    Character to check
00060         *
00061         *  @return
00062         *    'true' if the given character is an octal digit, else 'false'
00063         */
00064         static inline bool IsOctalDigit(char nCharacter);
00065 
00066         /**
00067         *  @brief
00068         *    Returns whether the given character is a hex digit
00069         *
00070         *  @param[in] nCharacter
00071         *    Character to check
00072         *
00073         *  @return
00074         *    'true' if the given character is a hex digit, else 'false'
00075         */
00076         static inline bool IsHexDigit(char nCharacter);
00077 
00078         /**
00079         *  @brief
00080         *    Returns whether the given character is the start of an UTF8 sequence or not
00081         *
00082         *  @param[in] nCharacter
00083         *    Character to check
00084         *
00085         *  @return
00086         *    'true' if the given character is the start of a UTF8 sequence, else 'false'
00087         */
00088         static inline bool IsSequenceStart(char nCharacter);
00089 
00090         /**
00091         *  @brief
00092         *    Returns the number of bytes a given character requires
00093         *
00094         *  @param[in] nCharacter
00095         *    Character to check
00096         *
00097         *  @return
00098         *    Returns the number of bytes the given character requires
00099         */
00100         static PLCORE_API uint8 GetNumOfCharacterBytes(char nCharacter);
00101 
00102         /**
00103         *  @brief
00104         *    Returns the number of bytes a given wide character requires
00105         *
00106         *  @param[in] nWideCharacter
00107         *    Wide character to check
00108         *
00109         *  @return
00110         *    Returns the number of bytes the given wide character requires
00111         */
00112         static PLCORE_API uint8 GetNumOfCharacterBytes(wchar_t nWideCharacter);
00113 
00114         /**
00115         *  @brief
00116         *    Returns a character as wide character
00117         *
00118         *  @param[in] pnCharacter
00119         *    Pointer to character to return, if a null pointer '\0' will be returned
00120         *
00121         *  @return
00122         *    The character as wide character
00123         */
00124         static PLCORE_API wchar_t GetWideCharacter(const char *pnCharacter);
00125 
00126         /**
00127         *  @brief
00128         *    Returns the next character as wide character
00129         *
00130         *  @param[in] ppszString
00131         *    String where to return the next wide character from, MUST be valid!
00132         *
00133         *  @return
00134         *    The next character as wide character
00135         *
00136         *  @note
00137         *    - The given string pointer is updated
00138         */
00139         static PLCORE_API wchar_t GetNextWideCharacter(const char **ppszString);
00140 
00141         /**
00142         *  @brief
00143         *    Moves to the next character
00144         *
00145         *  @param[in] ppszString
00146         *    String where to move to the next character, MUST be valid!
00147         *
00148         *  @return
00149         *    Number of skipped bytes
00150         */
00151         static PLCORE_API uint8 MoveToNextCharacter(const char **ppszString);
00152 
00153         /**
00154         *  @brief
00155         *    Moves to the previous character
00156         *
00157         *  @param[in] ppszString
00158         *    String where to move to the previous character, MUST be valid!
00159         *
00160         *  @return
00161         *    Number of skipped bytes
00162         */
00163         static PLCORE_API uint8 MoveToPreviousCharacter(const char **ppszString);
00164 
00165         /**
00166         *  brief
00167         *    Character index => byte offset were the character starts within the given UTF8 string
00168         *
00169         *  @param[in] pszString
00170         *    Pointer to string to use, if a null pointer '0' will be returned
00171         *  @param[in] nCharacterIndex
00172         *    Character index, MUST be valid!
00173         *
00174         *  @return
00175         *    Byte offset
00176         */
00177         static PLCORE_API uint32 CharacterIndexToByteOffset(const char *pszString, uint32 nCharacterIndex);
00178 
00179         /**
00180         *  brief
00181         *    Byte offset => character index were the character starts within the given UTF8 string
00182         *
00183         *  @param[in] pszString
00184         *    Pointer to string to use, if a null pointer '0' will be returned
00185         *  @param[in] nOffset
00186         *    Byte offset, MUST be valid!
00187         *
00188         *  @return
00189         *    Character index
00190         */
00191         static PLCORE_API uint32 ByteOffsetToCharacterIndex(const char *pszString, uint32 nOffset);
00192 
00193         /**
00194         *  @brief
00195         *    Returns the number of bytes a given string requires
00196         *
00197         *  @param[in] pszString
00198         *    Pointer to string to check, if a null pointer '0' will be returned
00199         *  @param[in] nCount
00200         *    Number of string character to take into account, if 0, take all characters into account
00201         *
00202         *  @return
00203         *    Returns the number of bytes the given string requires (excluding the terminating zero)
00204         */
00205         static PLCORE_API uint32 GetNumOfStringBytes(const char *pszString, uint32 nCount = 0);
00206 
00207         /**
00208         *  @brief
00209         *    Returns the number of characters within a given string
00210         *
00211         *  @param[in] pszString
00212         *    Pointer to string to check, if a null pointer '0' will be returned
00213         *
00214         *  @return
00215         *    The number of characters within a given string (excluding the terminating zero)
00216         */
00217         static PLCORE_API uint32 GetNumOfCharacters(const char *pszString);
00218 
00219         /**
00220         *  @brief
00221         *    Returns the number of characters within a given string
00222         *
00223         *  @param[in] pszString
00224         *    Pointer to string to check, if a null pointer '0' will be returned
00225         *  @param[in] nNumOfBytes
00226         *    String size in bytes, MUST be valid!
00227         *
00228         *  @return
00229         *    The number of characters within a given string (excluding the terminating zero)
00230         */
00231         static PLCORE_API uint32 GetNumOfCharacters(const char *pszString, uint32 nNumOfBytes);
00232 
00233         /**
00234         *  @brief
00235         *    Returns the number of characters and bytes within a given string
00236         *
00237         *  @param[in]  pszString
00238         *    Pointer to string to check, if a null pointer '0' will be returned
00239         *  @param[out] nNumOfBytes
00240         *    Receives the string size in bytes
00241         *
00242         *  @return
00243         *    The number of characters within a given string (excluding the terminating zero)
00244         */
00245         static PLCORE_API uint32 GetNumOfCharactersAndBytes(const char *pszString, uint32 &nNumOfBytes);
00246 
00247         /**
00248         *  @brief
00249         *    Returns a pointer to the first occurrence of the given wide character in the given string, or a null pointer if not found
00250         *
00251         *  @param[in]  pszString
00252         *    Pointer to string to use, if a null pointer 'a null pointer' will be returned
00253         *  @param[in]  nWideCharacter
00254         *    Wide character to find
00255         *  @param[out] pnCharacterIndex
00256         *    Receives the character index of found character returned, if not a null pointer
00257         *
00258         *  @return
00259         *    Pointer to the first occurrence of the given wide character in the given string, or a null pointer if not found
00260         */
00261         static PLCORE_API const char *FindCharacter(const char *pszString, wchar_t nWideCharacter, uint32 *pnCharacterIndex);
00262 
00263         /**
00264         *  @brief
00265         *    Same as the above, but searches a buffer of a given size instead of a zero-terminated string
00266         *
00267         *  @param[in]  pszString
00268         *    Pointer to string to use, if a null pointer 'a null pointer' will be returned
00269         *  @param[in]  nNumOfBytes
00270         *    String size in bytes, MUST be valid!
00271         *  @param[in]  nWideCharacter
00272         *    Wide character to find
00273         *  @param[out] pnCharacterIndex
00274         *    Receives the character index of found character returned, if not a null pointer
00275         *
00276         *  @return
00277         *    Pointer to the first occurrence of the given character in the given string, or a null pointer if not found
00278         */
00279         static PLCORE_API const char *FindCharacter(const char *pszString, uint32 nNumOfBytes, wchar_t nWideCharacter, uint32 *pnCharacterIndex);
00280 
00281         /**
00282         *  @brief
00283         *    Single wide character character to UTF8
00284         *
00285         *  @param[out] pszDestination
00286         *    Receives the converted character, MUST be large enough to keep the result! If a null pointer '0' will be returned.
00287         *  @param[in]  nWideCharacter
00288         *    Wide character to convert
00289         *
00290         *  @return
00291         *    Returns the number of bytes of the converted wide character
00292         *
00293         *  @note
00294         *    - Use GetNumOfCharacterBytes() to get the number of bytes a wide character requires
00295         */
00296         static PLCORE_API uint8 FromWideCharacter(char *pszDestination, wchar_t nWideCharacter);
00297 
00298         /**
00299         *  @brief
00300         *    Wide character string to UTF8
00301         *
00302         *  @param[out] pszDestination
00303         *    Receives the converted string, MUST be large enough to hold the result or a null pointer!
00304         *  @param[in]  nNumOfBytes
00305         *    Length in bytes of the destination buffer, MUST be valid if 'pszDestination' is not a null pointer!
00306         *    (+1 if a terminating zero should be added)
00307         *  @param[in]  pszSource
00308         *    Pointer to wide character string to convert, if a null pointer '0' will be returned and 'destination' not manipulated
00309         *  @param[in]  nSourceLength
00310         *    Number of source characters, or '0' if zero-terminated
00311         *
00312         *  @return
00313         *    The number of used destination bytes (excluding the terminating zero)
00314         *
00315         *  @note
00316         *    - If 'pszDestination' is a null pointer, the the resulting length in bytes of the converted string
00317         *      (excluding the terminating zero) is returned
00318         */
00319         static PLCORE_API uint32 FromWideCharacterString(char *pszDestination, uint32 nNumOfBytes, const wchar_t *pszSource, uint32 nSourceLength);
00320 
00321         /**
00322         *  @brief
00323         *    UTF8 to wide character string
00324         *
00325         *  @param[out] pszDestination
00326         *    Receives the converted string, MUST be large enough to hold the result! If a null pointer '0' will be returned.
00327         *  @param[in]  nLength
00328         *    Length of the destination buffer, MUST be valid! (including the terminating zero)
00329         *  @param[in]  pszSource
00330         *    Pointer to wide character string to convert, if a null pointer '0' will be returned and 'destination' not manipulated
00331         *  @param[in]  nSourceNumOfBytes
00332         *    Number of source bytes to process, or '0' if zero-terminated
00333         *
00334         *  @return
00335         *    Number of converted characters
00336         *
00337         *  @note
00338         *    - Only works for valid UTF8, i.e. no 5- or 6-byte sequences
00339         */
00340         static PLCORE_API uint32 ToWideCharacterString(wchar_t *pszDestination, uint32 nLength, const char *pszSource, uint32 nSourceNumOfBytes);
00341 
00342         /**
00343         *  @brief
00344         *    Given a wide character, convert it to an ASCII escape sequence stored in the given destination buffer
00345         *
00346         *  @param[out] pszDestination
00347         *    Will received the converted result, MUST be large enough to hold the result or a null pointer!
00348         *  @param[in]  nNumOfBytes
00349         *    Length in bytes of the destination buffer, MUST be valid if 'pszDestination' is not a null pointer!
00350         *  @param[in]  nWideCharacter
00351         *    Wide character to convert
00352         *
00353         *  @return
00354         *    Number of characters within the destination buffer (excluding the terminating zero)
00355         *
00356         *  @note
00357         *    - If 'pszDestination' is a null pointer, the the resulting length in bytes of the converted string
00358         *      (excluding the terminating zero) is returned
00359         */
00360         static PLCORE_API uint32 EscapeWideCharacter(char *pszDestination, uint32 nNumOfBytes, wchar_t nWideCharacter);
00361 
00362         /**
00363         *  @brief
00364         *    Converts an UTF8 string to an ASCII string with escape sequences
00365         *
00366         *  @param[out] pszDestination
00367         *    Will received the converted result, MUST be large enough to hold the result or a null pointer!
00368         *  @param[in]  nNumOfBytes
00369         *    Length in bytes of the destination buffer, MUST be valid if 'pszDestination' is not a null pointer!
00370         *  @param[in]  pszSource
00371         *    Pointer to source to read from, if a null pointer '0' will be returned and 'destination' not manipulated
00372         *  @param[in]  bEscapeQuotes
00373         *    If 'true', quote characters will be preceded by backslashes as well
00374         *
00375         *  @return
00376         *    Number of characters within the destination buffer (excluding the terminating zero)
00377         *
00378         *  @note
00379         *    - If 'pszDestination' is a null pointer, the the resulting length in bytes of the converted string
00380         *      (excluding the terminating zero) is returned
00381         */
00382         static PLCORE_API uint32 Escape(char *pszDestination, uint32 nNumOfBytes, const char *pszSource, bool bEscapeQuotes);
00383 
00384         /**
00385         *  @brief
00386         *    Assuming 'pszSource' points to the character after a backslash, read an escape sequence,
00387         *    storing the result in 'nDestination' and returning the number of input characters processed
00388         *
00389         *  @param[out] nDestination
00390         *    Will receive the result
00391         *  @param[in]  pszSource
00392         *    Pointer to source to read from, if a null pointer '0' will be returned and 'destination' not manipulated
00393         *
00394         *  @return
00395         *    Number of read characters (excluding the terminating zero)
00396         */
00397         static PLCORE_API uint32 ReadEscapeSequence(wchar_t &nDestination, const char *pszSource);
00398 
00399         /**
00400         *  @brief
00401         *    Converts a string with literal \uxxxx or \Uxxxxxxxx characters to UTF8
00402         *
00403         *  @param[out] pszDestination
00404         *    Will received the converted result, MUST be large enough to hold the result or a null pointer!
00405         *  @param[in]  nNumOfBytes
00406         *    Length in bytes of the destination buffer, MUST be valid if 'pszDestination' is not a null pointer!
00407         *    (+1 if a terminating zero should be added)
00408         *  @param[in]  pszSource
00409         *    Pointer to source string to convert, if a null pointer '0' will be returned and 'destination' not manipulated
00410         *
00411         *  @return
00412         *    The resulting length in bytes of the converted string (excluding the terminating zero)
00413         *
00414         *  @note
00415         *    - If 'pszDestination' is a null pointer, the the resulting length in bytes of the converted string
00416         *      (excluding the terminating zero) is returned
00417         */
00418         static PLCORE_API uint32 Unescape(char *pszDestination, uint32 nNumOfBytes, const char *pszSource);
00419 
00420         /**
00421         *  @brief
00422         *    Compares two UTF8 strings (case sensitive)
00423         *
00424         *  @param[in] pszFirst
00425         *    Pointer to first string for comparison, if a null pointer '0' will be returned
00426         *  @param[in] pszSecond
00427         *    Pointer to second string for comparison, if a null pointer '0' will be returned
00428         *  @param[in] nCount
00429         *    Amount of character to compare, can be 0 if the whole strings should be compared
00430         *
00431         *  @return
00432         *    -1 if the first string is smaller then the second one
00433         *     0 if both strings are equal
00434         *     1 if the first string is greater then the second one
00435         */
00436         static PLCORE_API int Compare(const char *pszFirst, const char *pszSecond, uint32 nCount = 0);
00437 
00438         /**
00439         *  @brief
00440         *    Find a substring in a UTF8 string
00441         *
00442         *  @param[in] pszSource
00443         *    Pointer to the string which should be searched through, if a null pointer 'a null pointer' will be returned
00444         *  @param[in] pszSubstring
00445         *    Pointer to the substring to be searched for, if a null pointer 'a null pointer' will be returned
00446         *
00447         *  @return
00448         *    A pointer to the start position of the substring or a null pointer if the search fails
00449         */
00450         static PLCORE_API const char *FindSubstring(const char *pszSource, const char *pszSubstring);
00451 
00452         /**
00453         *  @brief
00454         *    Copies the given UTF8 source string into the destination string
00455         *
00456         *  @param[out] pszDestination
00457         *    Will receive the copied string, if a null pointer 'a null pointer' will be returned
00458         *  @param[in]  pszSource
00459         *    Pointer to the source string, if a null pointer 'a null pointer' will be returned
00460         *  @param[in]  nCount
00461         *    Amount of character to copy, can be 0 if the whole strings should be copied
00462         *
00463         *  @return
00464         *    A pointer to 'szDestination' (always valid!)
00465         *
00466         *  @note
00467         *    - If 'nCount' is 0 or the source string is < 'nCount' then the '\0'-terminator is also copied
00468         */
00469         static PLCORE_API char *Copy(char *pszDestination, const char *pszSource, uint32 nCount = 0);
00470 
00471 
00472 };
00473 
00474 
00475 //[-------------------------------------------------------]
00476 //[ Namespace                                             ]
00477 //[-------------------------------------------------------]
00478 } // PLCore
00479 
00480 
00481 //[-------------------------------------------------------]
00482 //[ Implementation                                        ]
00483 //[-------------------------------------------------------]
00484 #include "PLCore/String/UTF8Tools.inl"
00485 
00486 
00487 #endif // __PLCORE_UTF8TOOLS_H__


PixelLight PixelLight 0.9.11-R1
Copyright (C) 2002-2012 by The PixelLight Team
Last modified Thu Feb 23 2012 14:09:01
The content of this PixelLight document is published under the
Creative Commons Attribution-NonCommercial-ShareAlike 3.0 Unported