Main Page   Class Hierarchy   Compound List   File List   Header Files   Sources   Compound Members   File Members  

normlzr.h

This is the verbatim text of the normlzr.h include file.
/*
 ********************************************************************
 * COPYRIGHT: 
 * Copyright (c) 1996-1999, International Business Machines Corporation and
 * others. All Rights Reserved.
 ********************************************************************
 */

#ifndef NORMLZR_H
#define NORMLZR_H

#include "unicode/utypes.h"
#include "unicode/unistr.h"
#include "unicode/chariter.h"

/* forward declaration */
class ComposedCharIter;

class U_COMMON_API Normalizer
{

 public:
  // This tells us what the bits in the "mode" mean.
  enum {
    COMPAT_BIT         = 1,
    DECOMP_BIT         = 2,
    COMPOSE_BIT     = 4
  };



  enum {
      DONE=0xffff
  };

  enum EMode {

    NO_OP         = 0,
    
    COMPOSE         = COMPOSE_BIT,
    
    COMPOSE_COMPAT     = COMPOSE_BIT | COMPAT_BIT,
    
    DECOMP         = DECOMP_BIT,
    
    DECOMP_COMPAT     = DECOMP_BIT | COMPAT_BIT
  };

  enum {

    IGNORE_HANGUL     = 0x001
  };

  // Constructors
  
  Normalizer(const UnicodeString& str, 
         EMode mode);
    
  Normalizer(const UnicodeString& str, 
         EMode mode, 
         int32_t opt);
  
  Normalizer(const UChar* str,
         int32_t length,
         EMode mode);

  Normalizer(const UChar* str,
         int32_t length,
         EMode mode,
                 int32_t option);

  Normalizer(const CharacterIterator& iter, 
         EMode mode);
  
  Normalizer(const CharacterIterator& iter, 
         EMode mode, 
         int32_t opt);
  
  Normalizer(const Normalizer& copy);
  
  ~Normalizer();
  
  
  //-------------------------------------------------------------------------
  // Static utility methods
  //-------------------------------------------------------------------------
  
  static void normalize(const UnicodeString& source, 
            EMode mode, 
            int32_t options,
            UnicodeString& result, 
            UErrorCode &status);
  
  static void compose(const UnicodeString& source, 
              UBool compat,
              int32_t options,
              UnicodeString& result, 
              UErrorCode &status);
  
  static void decompose(const UnicodeString& source, 
            UBool compat,
            int32_t options,
            UnicodeString& result, 
            UErrorCode &status);


  //-------------------------------------------------------------------------
  // CharacterIterator overrides
  //-------------------------------------------------------------------------
  
  UChar32              current(void) const;
  
  UChar32              first(void);

  UChar32              last(void);
  
  UChar32              next(void);
  
  UChar32              previous(void);
  
  UChar32              setIndex(UTextOffset index);
  
  void                reset(void);
  
  UTextOffset            getIndex(void) const;
  
  UTextOffset            startIndex(void) const;
  
  UTextOffset            endIndex(void) const;
  
  
  //  virtual UBool    operator==(const CharacterIterator& that) const;
  UBool        operator==(const Normalizer& that) const;
  inline UBool        operator!=(const Normalizer& that) const;
  
  Normalizer*        clone(void) const;
  
  int32_t                hashCode(void) const;

  //-------------------------------------------------------------------------
  // Property access methods
  //-------------------------------------------------------------------------
  
  void setMode(EMode newMode);
  
  EMode getMode(void) const;
  
  void setOption(int32_t option, 
         UBool value);
  
  UBool getOption(int32_t option) const;
  
  void setText(const UnicodeString& newText, 
           UErrorCode &status);
  
  void setText(const CharacterIterator& newText, 
           UErrorCode &status);
  
  void setText(const UChar* newText,
                    int32_t length,
            UErrorCode &status);
  void            getText(UnicodeString&  result);
  
  const UChar*     getText(int32_t&  count);

private:
  // Private utility methods for iteration
  // For documentation, see the source code
  UChar nextCompose(void);
  UChar prevCompose(void);
  UChar nextDecomp(void);
  UChar prevDecomp(void);
  
  UChar curForward(void);
  UChar curBackward(void);
  
  void    init(CharacterIterator* iter, 
         EMode mode, 
         int32_t option);
  void    initBuffer(void);
  void    clearBuffer(void);
  
  // Utilities used by Compose
  static void        bubbleAppend(UnicodeString& target, 
                     UChar ch, 
                     uint32_t cclass);
  static uint32_t     getComposeClass(UChar ch);
  static uint16_t    composeLookup(UChar ch);
  static uint16_t    composeAction(uint16_t baseIndex, 
                      uint16_t comIndex);
  static void        explode(UnicodeString& target, 
                uint16_t index);
  static UChar    pairExplode(UnicodeString& target, 
                    uint16_t action);
  
  // Utilities used by Decompose
  static void        fixCanonical(UnicodeString& result);    // Reorders combining marks
  static uint8_t    getClass(UChar ch);                    // Gets char's combining class
  
  // Other static utility methods
  static void doAppend(const UChar source[], 
               uint16_t offset, 
               UnicodeString& dest);
  static void doInsert(const UChar source[], 
               uint16_t offset, 
               UnicodeString& dest, 
               UTextOffset pos);
  
  static void hangulToJamo(UChar ch, 
               UnicodeString& result, 
               uint16_t decompLimit);
  static void jamoAppend(UChar ch, 
             uint16_t decompLimit, 
             UnicodeString& dest);
  static void jamoToHangul(UnicodeString& buffer, 
               UTextOffset start);
  
  //-------------------------------------------------------------------------
  // Private data
  //-------------------------------------------------------------------------
  
  EMode         fMode;
  int32_t       fOptions;
  int16_t    minDecomp;
  
  // The input text and our position in it
  CharacterIterator*  text;
  
  // A buffer for holding intermediate results
  UnicodeString       buffer;
  UTextOffset          bufferPos;
  UTextOffset          bufferLimit;
  UChar             currentChar;
  
  // Another buffer for use during iterative composition
  UnicodeString       explodeBuf;
  
  enum {
    EMPTY = -1,
    STR_INDEX_SHIFT = 2, //Must agree with the constants used in NormalizerBuilder
    STR_LENGTH_MASK = 0x0003
  };

  enum {
    HANGUL_BASE = 0xac00,
    HANGUL_LIMIT = 0xd7a4,
    JAMO_LBASE = 0x1100,
    JAMO_VBASE = 0x1161,
    JAMO_TBASE = 0x11a7,
    JAMO_LCOUNT = 19,
    JAMO_VCOUNT = 21,
    JAMO_TCOUNT = 28,
    JAMO_NCOUNT = JAMO_VCOUNT * JAMO_TCOUNT
  };
  
  friend class ComposedCharIter;
};

inline UBool
Normalizer::operator!= (const Normalizer& other) const
{ return ! operator==(other); }

#endif // _NORMLZR

Generated at Mon Jun 5 12:53:19 2000 for ICU1.5 by doxygen 1.0.0 written by Dimitri van Heesch, © 1997-1999