00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef RBBI_H
00015 #define RBBI_H
00016
00017 #include "unicode/utypes.h"
00018
00024 #if !UCONFIG_NO_BREAK_ITERATION
00025
00026 #include "unicode/brkiter.h"
00027 #include "unicode/udata.h"
00028 #include "unicode/parseerr.h"
00029 #include "unicode/schriter.h"
00030 #include "unicode/uchriter.h"
00031
00032
00033 struct UTrie;
00034
00035 U_NAMESPACE_BEGIN
00036
00038 struct RBBIDataHeader;
00039 class RuleBasedBreakIteratorTables;
00040 class BreakIterator;
00041 class RBBIDataWrapper;
00042 class UStack;
00043 class LanguageBreakEngine;
00044 class UnhandledEngine;
00045 struct RBBIStateTable;
00046
00047
00048
00049
00065 class U_COMMON_API RuleBasedBreakIterator : public BreakIterator {
00066
00067 protected:
00072 UText *fText;
00073
00079 CharacterIterator *fCharIter;
00080
00086 StringCharacterIterator *fSCharIter;
00087
00093 UCharCharacterIterator *fDCharIter;
00094
00099 RBBIDataWrapper *fData;
00100
00104 int32_t fLastRuleStatusIndex;
00105
00112 UBool fLastStatusIndexValid;
00113
00119 uint32_t fDictionaryCharCount;
00120
00128 int32_t* fCachedBreakPositions;
00129
00134 int32_t fNumCachedBreakPositions;
00135
00141 int32_t fPositionInCache;
00142
00150 UStack *fLanguageBreakEngines;
00151
00159 UnhandledEngine *fUnhandledBreakEngine;
00160
00166 int32_t fBreakType;
00167
00168 protected:
00169
00170
00171
00172
00173 #ifndef U_HIDE_INTERNAL_API
00174
00182 enum EDontAdopt {
00183 kDontAdopt
00184 };
00185
00196 RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
00197
00206 RuleBasedBreakIterator(const RBBIDataHeader* data, enum EDontAdopt dontAdopt, UErrorCode &status);
00207 #endif
00208
00209
00210 friend class RBBIRuleBuilder;
00212 friend class BreakIterator;
00213
00214
00215
00216 public:
00217
00222 RuleBasedBreakIterator();
00223
00230 RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
00231
00240 RuleBasedBreakIterator( const UnicodeString &rules,
00241 UParseError &parseError,
00242 UErrorCode &status);
00243
00267 RuleBasedBreakIterator(const uint8_t *compiledRules,
00268 uint32_t ruleLength,
00269 UErrorCode &status);
00270
00283 RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
00284
00289 virtual ~RuleBasedBreakIterator();
00290
00298 RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
00299
00308 virtual UBool operator==(const BreakIterator& that) const;
00309
00317 UBool operator!=(const BreakIterator& that) const;
00318
00329 virtual BreakIterator* clone() const;
00330
00336 virtual int32_t hashCode(void) const;
00337
00343 virtual const UnicodeString& getRules(void) const;
00344
00345
00346
00347
00348
00374 virtual CharacterIterator& getText(void) const;
00375
00376
00391 virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
00392
00400 virtual void adoptText(CharacterIterator* newText);
00401
00408 virtual void setText(const UnicodeString& newText);
00409
00423 virtual void setText(UText *text, UErrorCode &status);
00424
00430 virtual int32_t first(void);
00431
00437 virtual int32_t last(void);
00438
00449 virtual int32_t next(int32_t n);
00450
00456 virtual int32_t next(void);
00457
00463 virtual int32_t previous(void);
00464
00472 virtual int32_t following(int32_t offset);
00473
00481 virtual int32_t preceding(int32_t offset);
00482
00491 virtual UBool isBoundary(int32_t offset);
00492
00498 virtual int32_t current(void) const;
00499
00500
00533 virtual int32_t getRuleStatus() const;
00534
00558 virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
00559
00571 virtual UClassID getDynamicClassID(void) const;
00572
00584 static UClassID U_EXPORT2 getStaticClassID(void);
00585
00586
00587
00588
00589
00590
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600
00601
00602
00603
00604
00605
00606
00607
00608
00609
00610 virtual BreakIterator * createBufferClone(void *stackBuffer,
00611 int32_t &BufferSize,
00612 UErrorCode &status);
00613
00614
00632 virtual const uint8_t *getBinaryRules(uint32_t &length);
00633
00659 virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status);
00660
00661
00662 protected:
00663
00664
00665
00671 virtual void reset(void);
00672
00673 #if 0
00674
00682 virtual UBool isDictionaryChar(UChar32);
00683
00688 virtual int32_t getBreakType() const;
00689 #endif
00690
00695 virtual void setBreakType(int32_t type);
00696
00697 #ifndef U_HIDE_INTERNAL_API
00698
00703 void init();
00704 #endif
00705
00706 private:
00707
00717 int32_t handlePrevious(const RBBIStateTable *statetable);
00718
00728 int32_t handleNext(const RBBIStateTable *statetable);
00729
00730 protected:
00731
00732 #ifndef U_HIDE_INTERNAL_API
00733
00747 int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
00748 #endif
00749
00750 private:
00751
00758 const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
00759
00763 void makeRuleStatusValid();
00764
00765 };
00766
00767
00768
00769
00770
00771
00772
00773 inline UBool RuleBasedBreakIterator::operator!=(const BreakIterator& that) const {
00774 return !operator==(that);
00775 }
00776
00777 U_NAMESPACE_END
00778
00779 #endif
00780
00781 #endif