ICU 54.1  54.1
translit.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1999-2014, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 * Date Name Description
7 * 11/17/99 aliu Creation.
8 **********************************************************************
9 */
10 #ifndef TRANSLIT_H
11 #define TRANSLIT_H
12 
13 #include "unicode/utypes.h"
14 
20 #if !UCONFIG_NO_TRANSLITERATION
21 
22 #include "unicode/uobject.h"
23 #include "unicode/unistr.h"
24 #include "unicode/parseerr.h"
25 #include "unicode/utrans.h" // UTransPosition, UTransDirection
26 #include "unicode/strenum.h"
27 
29 
30 class UnicodeFilter;
31 class UnicodeSet;
32 class CompoundTransliterator;
33 class TransliteratorParser;
34 class NormalizationTransliterator;
35 class TransliteratorIDParser;
36 
242 
243 private:
244 
248  UnicodeString ID;
249 
256  UnicodeFilter* filter;
257 
258  int32_t maximumContextLength;
259 
260  public:
261 
267  union Token {
272  int32_t integer;
277  void* pointer;
278  };
279 
280 #ifndef U_HIDE_INTERNAL_API
281 
286  inline static Token integerToken(int32_t);
287 
293  inline static Token pointerToken(void*);
294 #endif /* U_HIDE_INTERNAL_API */
295 
311  typedef Transliterator* (U_EXPORT2 *Factory)(const UnicodeString& ID, Token context);
312 
313 protected:
314 
324  Transliterator(const UnicodeString& ID, UnicodeFilter* adoptedFilter);
325 
331 
336  Transliterator& operator=(const Transliterator&);
337 
349  static Transliterator* createBasicInstance(const UnicodeString& id,
350  const UnicodeString* canon);
351 
352  friend class TransliteratorParser; // for parseID()
353  friend class TransliteratorIDParser; // for createBasicInstance()
354  friend class TransliteratorAlias; // for setID()
355 
356 public:
357 
362  virtual ~Transliterator();
363 
378  virtual Transliterator* clone() const;
379 
395  virtual int32_t transliterate(Replaceable& text,
396  int32_t start, int32_t limit) const;
397 
403  virtual void transliterate(Replaceable& text) const;
404 
469  virtual void transliterate(Replaceable& text, UTransPosition& index,
470  const UnicodeString& insertion,
471  UErrorCode& status) const;
472 
488  virtual void transliterate(Replaceable& text, UTransPosition& index,
489  UChar32 insertion,
490  UErrorCode& status) const;
491 
505  virtual void transliterate(Replaceable& text, UTransPosition& index,
506  UErrorCode& status) const;
507 
519  virtual void finishTransliteration(Replaceable& text,
520  UTransPosition& index) const;
521 
522 private:
523 
539  void _transliterate(Replaceable& text,
540  UTransPosition& index,
541  const UnicodeString* insertion,
542  UErrorCode &status) const;
543 
544 protected:
545 
625  virtual void handleTransliterate(Replaceable& text,
626  UTransPosition& pos,
627  UBool incremental) const = 0;
628 
629 public:
641  virtual void filteredTransliterate(Replaceable& text,
642  UTransPosition& index,
643  UBool incremental) const;
644 
645 private:
646 
674  virtual void filteredTransliterate(Replaceable& text,
675  UTransPosition& index,
676  UBool incremental,
677  UBool rollback) const;
678 
679 public:
680 
694  int32_t getMaximumContextLength(void) const;
695 
696 protected:
697 
704  void setMaximumContextLength(int32_t maxContextLength);
705 
706 public:
707 
718  virtual const UnicodeString& getID(void) const;
719 
729  static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
730  UnicodeString& result);
731 
753  static UnicodeString& U_EXPORT2 getDisplayName(const UnicodeString& ID,
754  const Locale& inLocale,
755  UnicodeString& result);
756 
764  const UnicodeFilter* getFilter(void) const;
765 
775  UnicodeFilter* orphanFilter(void);
776 
787  void adoptFilter(UnicodeFilter* adoptedFilter);
788 
808  Transliterator* createInverse(UErrorCode& status) const;
809 
826  static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
827  UTransDirection dir,
828  UParseError& parseError,
829  UErrorCode& status);
830 
841  static Transliterator* U_EXPORT2 createInstance(const UnicodeString& ID,
842  UTransDirection dir,
843  UErrorCode& status);
844 
860  static Transliterator* U_EXPORT2 createFromRules(const UnicodeString& ID,
861  const UnicodeString& rules,
862  UTransDirection dir,
863  UParseError& parseError,
864  UErrorCode& status);
865 
877  virtual UnicodeString& toRules(UnicodeString& result,
878  UBool escapeUnprintable) const;
879 
892  int32_t countElements() const;
893 
913  const Transliterator& getElement(int32_t index, UErrorCode& ec) const;
914 
930  UnicodeSet& getSourceSet(UnicodeSet& result) const;
931 
946  virtual void handleGetSourceSet(UnicodeSet& result) const;
947 
961  virtual UnicodeSet& getTargetSet(UnicodeSet& result) const;
962 
963 public:
964 
981  static void U_EXPORT2 registerFactory(const UnicodeString& id,
982  Factory factory,
983  Token context);
984 
1006  static void U_EXPORT2 registerInstance(Transliterator* adoptedObj);
1007 
1022  static void U_EXPORT2 registerAlias(const UnicodeString& aliasID,
1023  const UnicodeString& realID);
1024 
1025 protected:
1026 
1027 #ifndef U_HIDE_INTERNAL_API
1028 
1037  static void _registerFactory(const UnicodeString& id,
1038  Factory factory,
1039  Token context);
1040 
1044  static void _registerInstance(Transliterator* adoptedObj);
1045 
1049  static void _registerAlias(const UnicodeString& aliasID, const UnicodeString& realID);
1050 
1084  static void _registerSpecialInverse(const UnicodeString& target,
1085  const UnicodeString& inverseTarget,
1086  UBool bidirectional);
1087 #endif /* U_HIDE_INTERNAL_API */
1088 
1089 public:
1090 
1108  static void U_EXPORT2 unregister(const UnicodeString& ID);
1109 
1110 public:
1111 
1121  static StringEnumeration* U_EXPORT2 getAvailableIDs(UErrorCode& ec);
1122 
1128  static int32_t U_EXPORT2 countAvailableSources(void);
1129 
1139  static UnicodeString& U_EXPORT2 getAvailableSource(int32_t index,
1140  UnicodeString& result);
1141 
1150  static int32_t U_EXPORT2 countAvailableTargets(const UnicodeString& source);
1151 
1163  static UnicodeString& U_EXPORT2 getAvailableTarget(int32_t index,
1164  const UnicodeString& source,
1165  UnicodeString& result);
1166 
1174  static int32_t U_EXPORT2 countAvailableVariants(const UnicodeString& source,
1175  const UnicodeString& target);
1176 
1190  static UnicodeString& U_EXPORT2 getAvailableVariant(int32_t index,
1191  const UnicodeString& source,
1192  const UnicodeString& target,
1193  UnicodeString& result);
1194 
1195 protected:
1196 
1197 #ifndef U_HIDE_INTERNAL_API
1198 
1202  static int32_t _countAvailableSources(void);
1203 
1208  static UnicodeString& _getAvailableSource(int32_t index,
1209  UnicodeString& result);
1210 
1215  static int32_t _countAvailableTargets(const UnicodeString& source);
1216 
1221  static UnicodeString& _getAvailableTarget(int32_t index,
1222  const UnicodeString& source,
1223  UnicodeString& result);
1224 
1229  static int32_t _countAvailableVariants(const UnicodeString& source,
1230  const UnicodeString& target);
1231 
1236  static UnicodeString& _getAvailableVariant(int32_t index,
1237  const UnicodeString& source,
1238  const UnicodeString& target,
1239  UnicodeString& result);
1240 #endif /* U_HIDE_INTERNAL_API */
1241 
1242 protected:
1243 
1250  void setID(const UnicodeString& id);
1251 
1252 public:
1253 
1264  static UClassID U_EXPORT2 getStaticClassID(void);
1265 
1281  virtual UClassID getDynamicClassID(void) const = 0;
1282 
1283 private:
1284  static UBool initializeRegistry(UErrorCode &status);
1285 
1286 public:
1287 #ifndef U_HIDE_OBSOLETE_API
1288 
1295  static int32_t U_EXPORT2 countAvailableIDs(void);
1296 
1309  static const UnicodeString& U_EXPORT2 getAvailableID(int32_t index);
1310 #endif /* U_HIDE_OBSOLETE_API */
1311 };
1312 
1313 inline int32_t Transliterator::getMaximumContextLength(void) const {
1314  return maximumContextLength;
1315 }
1316 
1317 inline void Transliterator::setID(const UnicodeString& id) {
1318  ID = id;
1319  // NUL-terminate the ID string, which is a non-aliased copy.
1320  ID.append((UChar)0);
1321  ID.truncate(ID.length()-1);
1322 }
1323 
1324 #ifndef U_HIDE_INTERNAL_API
1325 inline Transliterator::Token Transliterator::integerToken(int32_t i) {
1326  Token t;
1327  t.integer = i;
1328  return t;
1329 }
1330 
1331 inline Transliterator::Token Transliterator::pointerToken(void* p) {
1332  Token t;
1333  t.pointer = p;
1334  return t;
1335 }
1336 #endif /* U_HIDE_INTERNAL_API */
1337 
1339 
1340 #endif /* #if !UCONFIG_NO_TRANSLITERATION */
1341 
1342 #endif
virtual UClassID getDynamicClassID() const
ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
Position structure for utrans_transIncremental() incremental transliteration.
Definition: utrans.h:120
C++ API: Unicode String.
Transliterator *(* Factory)(const UnicodeString &ID, Token context)
A function that creates and returns a Transliterator.
Definition: translit.h:311
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:91
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:55
Transliterator is an abstract class that transliterates text from one format to another.
Definition: translit.h:241
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside...
Definition: utypes.h:358
A context integer or pointer for a factory function, passed by value.
Definition: translit.h:267
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:71
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
UBool truncate(int32_t targetLength)
Truncate this UnicodeString to the targetLength.
Definition: unistr.h:4445
UnicodeFilter defines a protocol for selecting a subset of the full range (U+0000 to U+10FFFF) of Uni...
Definition: unifilt.h:59
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
C API: Transliterator.
UnicodeString & append(const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Append the characters in srcText in the range [srcStart, srcStart + srcLength) to the UnicodeString o...
Definition: unistr.h:4337
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:276
C++ API: Common ICU base class UObject.
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
C API: Parse Error Information.
int32_t integer
This token, interpreted as a 32-bit integer.
Definition: translit.h:272
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
C++ API: String Enumeration.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:56
Basic definitions for ICU, for both C and C++ APIs.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:245
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:221
UTransDirection
Direction constant indicating the direction in a transliterator, e.g., the forward or reverse rules o...
Definition: utrans.h:78
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
void * pointer
This token, interpreted as a native pointer.
Definition: translit.h:277
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:185