PixelLightAPI
.
|
00001 /*********************************************************\ 00002 * File: UTF8Tools.h * 00003 * 00004 * Copyright (C) 2002-2012 The PixelLight Team (http://www.pixellight.org/) 00005 * 00006 * This file is part of PixelLight. 00007 * 00008 * PixelLight is free software: you can redistribute it and/or modify 00009 * it under the terms of the GNU Lesser General Public License as published by 00010 * the Free Software Foundation, either version 3 of the License, or 00011 * (at your option) any later version. 00012 * 00013 * PixelLight is distributed in the hope that it will be useful, 00014 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00015 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00016 * GNU Lesser General Public License for more details. 00017 * 00018 * You should have received a copy of the GNU Lesser General Public License 00019 * along with PixelLight. If not, see <http://www.gnu.org/licenses/>. 00020 \*********************************************************/ 00021 00022 00023 #ifndef __PLCORE_UTF8TOOLS_H__ 00024 #define __PLCORE_UTF8TOOLS_H__ 00025 #pragma once 00026 00027 00028 //[-------------------------------------------------------] 00029 //[ Includes ] 00030 //[-------------------------------------------------------] 00031 #include "PLCore/PLCore.h" 00032 00033 00034 //[-------------------------------------------------------] 00035 //[ Namespace ] 00036 //[-------------------------------------------------------] 00037 namespace PLCore { 00038 00039 00040 //[-------------------------------------------------------] 00041 //[ Classes ] 00042 //[-------------------------------------------------------] 00043 /** 00044 * @brief 00045 * Static class with UTF8 tool functions 00046 */ 00047 class UTF8Tools { 00048 00049 00050 //[-------------------------------------------------------] 00051 //[ Public static functions ] 00052 //[-------------------------------------------------------] 00053 public: 00054 /** 00055 * @brief 00056 * Returns whether the given character is an octal digit 00057 * 00058 * @param[in] nCharacter 00059 * Character to check 00060 * 00061 * @return 00062 * 'true' if the given character is an octal digit, else 'false' 00063 */ 00064 static inline bool IsOctalDigit(char nCharacter); 00065 00066 /** 00067 * @brief 00068 * Returns whether the given character is a hex digit 00069 * 00070 * @param[in] nCharacter 00071 * Character to check 00072 * 00073 * @return 00074 * 'true' if the given character is a hex digit, else 'false' 00075 */ 00076 static inline bool IsHexDigit(char nCharacter); 00077 00078 /** 00079 * @brief 00080 * Returns whether the given character is the start of an UTF8 sequence or not 00081 * 00082 * @param[in] nCharacter 00083 * Character to check 00084 * 00085 * @return 00086 * 'true' if the given character is the start of a UTF8 sequence, else 'false' 00087 */ 00088 static inline bool IsSequenceStart(char nCharacter); 00089 00090 /** 00091 * @brief 00092 * Returns the number of bytes a given character requires 00093 * 00094 * @param[in] nCharacter 00095 * Character to check 00096 * 00097 * @return 00098 * Returns the number of bytes the given character requires 00099 */ 00100 static PLCORE_API uint8 GetNumOfCharacterBytes(char nCharacter); 00101 00102 /** 00103 * @brief 00104 * Returns the number of bytes a given wide character requires 00105 * 00106 * @param[in] nWideCharacter 00107 * Wide character to check 00108 * 00109 * @return 00110 * Returns the number of bytes the given wide character requires 00111 */ 00112 static PLCORE_API uint8 GetNumOfCharacterBytes(wchar_t nWideCharacter); 00113 00114 /** 00115 * @brief 00116 * Returns a character as wide character 00117 * 00118 * @param[in] pnCharacter 00119 * Pointer to character to return, if a null pointer '\0' will be returned 00120 * 00121 * @return 00122 * The character as wide character 00123 */ 00124 static PLCORE_API wchar_t GetWideCharacter(const char *pnCharacter); 00125 00126 /** 00127 * @brief 00128 * Returns the next character as wide character 00129 * 00130 * @param[in] ppszString 00131 * String where to return the next wide character from, MUST be valid! 00132 * 00133 * @return 00134 * The next character as wide character 00135 * 00136 * @note 00137 * - The given string pointer is updated 00138 */ 00139 static PLCORE_API wchar_t GetNextWideCharacter(const char **ppszString); 00140 00141 /** 00142 * @brief 00143 * Moves to the next character 00144 * 00145 * @param[in] ppszString 00146 * String where to move to the next character, MUST be valid! 00147 * 00148 * @return 00149 * Number of skipped bytes 00150 */ 00151 static PLCORE_API uint8 MoveToNextCharacter(const char **ppszString); 00152 00153 /** 00154 * @brief 00155 * Moves to the previous character 00156 * 00157 * @param[in] ppszString 00158 * String where to move to the previous character, MUST be valid! 00159 * 00160 * @return 00161 * Number of skipped bytes 00162 */ 00163 static PLCORE_API uint8 MoveToPreviousCharacter(const char **ppszString); 00164 00165 /** 00166 * brief 00167 * Character index => byte offset were the character starts within the given UTF8 string 00168 * 00169 * @param[in] pszString 00170 * Pointer to string to use, if a null pointer '0' will be returned 00171 * @param[in] nCharacterIndex 00172 * Character index, MUST be valid! 00173 * 00174 * @return 00175 * Byte offset 00176 */ 00177 static PLCORE_API uint32 CharacterIndexToByteOffset(const char *pszString, uint32 nCharacterIndex); 00178 00179 /** 00180 * brief 00181 * Byte offset => character index were the character starts within the given UTF8 string 00182 * 00183 * @param[in] pszString 00184 * Pointer to string to use, if a null pointer '0' will be returned 00185 * @param[in] nOffset 00186 * Byte offset, MUST be valid! 00187 * 00188 * @return 00189 * Character index 00190 */ 00191 static PLCORE_API uint32 ByteOffsetToCharacterIndex(const char *pszString, uint32 nOffset); 00192 00193 /** 00194 * @brief 00195 * Returns the number of bytes a given string requires 00196 * 00197 * @param[in] pszString 00198 * Pointer to string to check, if a null pointer '0' will be returned 00199 * @param[in] nCount 00200 * Number of string character to take into account, if 0, take all characters into account 00201 * 00202 * @return 00203 * Returns the number of bytes the given string requires (excluding the terminating zero) 00204 */ 00205 static PLCORE_API uint32 GetNumOfStringBytes(const char *pszString, uint32 nCount = 0); 00206 00207 /** 00208 * @brief 00209 * Returns the number of characters within a given string 00210 * 00211 * @param[in] pszString 00212 * Pointer to string to check, if a null pointer '0' will be returned 00213 * 00214 * @return 00215 * The number of characters within a given string (excluding the terminating zero) 00216 */ 00217 static PLCORE_API uint32 GetNumOfCharacters(const char *pszString); 00218 00219 /** 00220 * @brief 00221 * Returns the number of characters within a given string 00222 * 00223 * @param[in] pszString 00224 * Pointer to string to check, if a null pointer '0' will be returned 00225 * @param[in] nNumOfBytes 00226 * String size in bytes, MUST be valid! 00227 * 00228 * @return 00229 * The number of characters within a given string (excluding the terminating zero) 00230 */ 00231 static PLCORE_API uint32 GetNumOfCharacters(const char *pszString, uint32 nNumOfBytes); 00232 00233 /** 00234 * @brief 00235 * Returns the number of characters and bytes within a given string 00236 * 00237 * @param[in] pszString 00238 * Pointer to string to check, if a null pointer '0' will be returned 00239 * @param[out] nNumOfBytes 00240 * Receives the string size in bytes 00241 * 00242 * @return 00243 * The number of characters within a given string (excluding the terminating zero) 00244 */ 00245 static PLCORE_API uint32 GetNumOfCharactersAndBytes(const char *pszString, uint32 &nNumOfBytes); 00246 00247 /** 00248 * @brief 00249 * Returns a pointer to the first occurrence of the given wide character in the given string, or a null pointer if not found 00250 * 00251 * @param[in] pszString 00252 * Pointer to string to use, if a null pointer 'a null pointer' will be returned 00253 * @param[in] nWideCharacter 00254 * Wide character to find 00255 * @param[out] pnCharacterIndex 00256 * Receives the character index of found character returned, if not a null pointer 00257 * 00258 * @return 00259 * Pointer to the first occurrence of the given wide character in the given string, or a null pointer if not found 00260 */ 00261 static PLCORE_API const char *FindCharacter(const char *pszString, wchar_t nWideCharacter, uint32 *pnCharacterIndex); 00262 00263 /** 00264 * @brief 00265 * Same as the above, but searches a buffer of a given size instead of a zero-terminated string 00266 * 00267 * @param[in] pszString 00268 * Pointer to string to use, if a null pointer 'a null pointer' will be returned 00269 * @param[in] nNumOfBytes 00270 * String size in bytes, MUST be valid! 00271 * @param[in] nWideCharacter 00272 * Wide character to find 00273 * @param[out] pnCharacterIndex 00274 * Receives the character index of found character returned, if not a null pointer 00275 * 00276 * @return 00277 * Pointer to the first occurrence of the given character in the given string, or a null pointer if not found 00278 */ 00279 static PLCORE_API const char *FindCharacter(const char *pszString, uint32 nNumOfBytes, wchar_t nWideCharacter, uint32 *pnCharacterIndex); 00280 00281 /** 00282 * @brief 00283 * Single wide character character to UTF8 00284 * 00285 * @param[out] pszDestination 00286 * Receives the converted character, MUST be large enough to keep the result! If a null pointer '0' will be returned. 00287 * @param[in] nWideCharacter 00288 * Wide character to convert 00289 * 00290 * @return 00291 * Returns the number of bytes of the converted wide character 00292 * 00293 * @note 00294 * - Use GetNumOfCharacterBytes() to get the number of bytes a wide character requires 00295 */ 00296 static PLCORE_API uint8 FromWideCharacter(char *pszDestination, wchar_t nWideCharacter); 00297 00298 /** 00299 * @brief 00300 * Wide character string to UTF8 00301 * 00302 * @param[out] pszDestination 00303 * Receives the converted string, MUST be large enough to hold the result or a null pointer! 00304 * @param[in] nNumOfBytes 00305 * Length in bytes of the destination buffer, MUST be valid if 'pszDestination' is not a null pointer! 00306 * (+1 if a terminating zero should be added) 00307 * @param[in] pszSource 00308 * Pointer to wide character string to convert, if a null pointer '0' will be returned and 'destination' not manipulated 00309 * @param[in] nSourceLength 00310 * Number of source characters, or '0' if zero-terminated 00311 * 00312 * @return 00313 * The number of used destination bytes (excluding the terminating zero) 00314 * 00315 * @note 00316 * - If 'pszDestination' is a null pointer, the the resulting length in bytes of the converted string 00317 * (excluding the terminating zero) is returned 00318 */ 00319 static PLCORE_API uint32 FromWideCharacterString(char *pszDestination, uint32 nNumOfBytes, const wchar_t *pszSource, uint32 nSourceLength); 00320 00321 /** 00322 * @brief 00323 * UTF8 to wide character string 00324 * 00325 * @param[out] pszDestination 00326 * Receives the converted string, MUST be large enough to hold the result! If a null pointer '0' will be returned. 00327 * @param[in] nLength 00328 * Length of the destination buffer, MUST be valid! (including the terminating zero) 00329 * @param[in] pszSource 00330 * Pointer to wide character string to convert, if a null pointer '0' will be returned and 'destination' not manipulated 00331 * @param[in] nSourceNumOfBytes 00332 * Number of source bytes to process, or '0' if zero-terminated 00333 * 00334 * @return 00335 * Number of converted characters 00336 * 00337 * @note 00338 * - Only works for valid UTF8, i.e. no 5- or 6-byte sequences 00339 */ 00340 static PLCORE_API uint32 ToWideCharacterString(wchar_t *pszDestination, uint32 nLength, const char *pszSource, uint32 nSourceNumOfBytes); 00341 00342 /** 00343 * @brief 00344 * Given a wide character, convert it to an ASCII escape sequence stored in the given destination buffer 00345 * 00346 * @param[out] pszDestination 00347 * Will received the converted result, MUST be large enough to hold the result or a null pointer! 00348 * @param[in] nNumOfBytes 00349 * Length in bytes of the destination buffer, MUST be valid if 'pszDestination' is not a null pointer! 00350 * @param[in] nWideCharacter 00351 * Wide character to convert 00352 * 00353 * @return 00354 * Number of characters within the destination buffer (excluding the terminating zero) 00355 * 00356 * @note 00357 * - If 'pszDestination' is a null pointer, the the resulting length in bytes of the converted string 00358 * (excluding the terminating zero) is returned 00359 */ 00360 static PLCORE_API uint32 EscapeWideCharacter(char *pszDestination, uint32 nNumOfBytes, wchar_t nWideCharacter); 00361 00362 /** 00363 * @brief 00364 * Converts an UTF8 string to an ASCII string with escape sequences 00365 * 00366 * @param[out] pszDestination 00367 * Will received the converted result, MUST be large enough to hold the result or a null pointer! 00368 * @param[in] nNumOfBytes 00369 * Length in bytes of the destination buffer, MUST be valid if 'pszDestination' is not a null pointer! 00370 * @param[in] pszSource 00371 * Pointer to source to read from, if a null pointer '0' will be returned and 'destination' not manipulated 00372 * @param[in] bEscapeQuotes 00373 * If 'true', quote characters will be preceded by backslashes as well 00374 * 00375 * @return 00376 * Number of characters within the destination buffer (excluding the terminating zero) 00377 * 00378 * @note 00379 * - If 'pszDestination' is a null pointer, the the resulting length in bytes of the converted string 00380 * (excluding the terminating zero) is returned 00381 */ 00382 static PLCORE_API uint32 Escape(char *pszDestination, uint32 nNumOfBytes, const char *pszSource, bool bEscapeQuotes); 00383 00384 /** 00385 * @brief 00386 * Assuming 'pszSource' points to the character after a backslash, read an escape sequence, 00387 * storing the result in 'nDestination' and returning the number of input characters processed 00388 * 00389 * @param[out] nDestination 00390 * Will receive the result 00391 * @param[in] pszSource 00392 * Pointer to source to read from, if a null pointer '0' will be returned and 'destination' not manipulated 00393 * 00394 * @return 00395 * Number of read characters (excluding the terminating zero) 00396 */ 00397 static PLCORE_API uint32 ReadEscapeSequence(wchar_t &nDestination, const char *pszSource); 00398 00399 /** 00400 * @brief 00401 * Converts a string with literal \uxxxx or \Uxxxxxxxx characters to UTF8 00402 * 00403 * @param[out] pszDestination 00404 * Will received the converted result, MUST be large enough to hold the result or a null pointer! 00405 * @param[in] nNumOfBytes 00406 * Length in bytes of the destination buffer, MUST be valid if 'pszDestination' is not a null pointer! 00407 * (+1 if a terminating zero should be added) 00408 * @param[in] pszSource 00409 * Pointer to source string to convert, if a null pointer '0' will be returned and 'destination' not manipulated 00410 * 00411 * @return 00412 * The resulting length in bytes of the converted string (excluding the terminating zero) 00413 * 00414 * @note 00415 * - If 'pszDestination' is a null pointer, the the resulting length in bytes of the converted string 00416 * (excluding the terminating zero) is returned 00417 */ 00418 static PLCORE_API uint32 Unescape(char *pszDestination, uint32 nNumOfBytes, const char *pszSource); 00419 00420 /** 00421 * @brief 00422 * Compares two UTF8 strings (case sensitive) 00423 * 00424 * @param[in] pszFirst 00425 * Pointer to first string for comparison, if a null pointer '0' will be returned 00426 * @param[in] pszSecond 00427 * Pointer to second string for comparison, if a null pointer '0' will be returned 00428 * @param[in] nCount 00429 * Amount of character to compare, can be 0 if the whole strings should be compared 00430 * 00431 * @return 00432 * -1 if the first string is smaller then the second one 00433 * 0 if both strings are equal 00434 * 1 if the first string is greater then the second one 00435 */ 00436 static PLCORE_API int Compare(const char *pszFirst, const char *pszSecond, uint32 nCount = 0); 00437 00438 /** 00439 * @brief 00440 * Find a substring in a UTF8 string 00441 * 00442 * @param[in] pszSource 00443 * Pointer to the string which should be searched through, if a null pointer 'a null pointer' will be returned 00444 * @param[in] pszSubstring 00445 * Pointer to the substring to be searched for, if a null pointer 'a null pointer' will be returned 00446 * 00447 * @return 00448 * A pointer to the start position of the substring or a null pointer if the search fails 00449 */ 00450 static PLCORE_API const char *FindSubstring(const char *pszSource, const char *pszSubstring); 00451 00452 /** 00453 * @brief 00454 * Copies the given UTF8 source string into the destination string 00455 * 00456 * @param[out] pszDestination 00457 * Will receive the copied string, if a null pointer 'a null pointer' will be returned 00458 * @param[in] pszSource 00459 * Pointer to the source string, if a null pointer 'a null pointer' will be returned 00460 * @param[in] nCount 00461 * Amount of character to copy, can be 0 if the whole strings should be copied 00462 * 00463 * @return 00464 * A pointer to 'szDestination' (always valid!) 00465 * 00466 * @note 00467 * - If 'nCount' is 0 or the source string is < 'nCount' then the '\0'-terminator is also copied 00468 */ 00469 static PLCORE_API char *Copy(char *pszDestination, const char *pszSource, uint32 nCount = 0); 00470 00471 00472 }; 00473 00474 00475 //[-------------------------------------------------------] 00476 //[ Namespace ] 00477 //[-------------------------------------------------------] 00478 } // PLCore 00479 00480 00481 //[-------------------------------------------------------] 00482 //[ Implementation ] 00483 //[-------------------------------------------------------] 00484 #include "PLCore/String/UTF8Tools.inl" 00485 00486 00487 #endif // __PLCORE_UTF8TOOLS_H__
|