1. ----------------------------------------------------------------------- 
  2. --               GtkAda - Ada95 binding for Gtk+/Gnome               -- 
  3. --                                                                   -- 
  4. --                   Copyright (C) 2003 ACT-Europe                   -- 
  5. --                                                                   -- 
  6. -- This library is free software; you can redistribute it and/or     -- 
  7. -- modify it under the terms of the GNU General Public               -- 
  8. -- License as published by the Free Software Foundation; either      -- 
  9. -- version 2 of the License, or (at your option) any later version.  -- 
  10. --                                                                   -- 
  11. -- This library is distributed in the hope that it will be useful,   -- 
  12. -- but WITHOUT ANY WARRANTY; without even the implied warranty of    -- 
  13. -- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU -- 
  14. -- General Public License for more details.                          -- 
  15. --                                                                   -- 
  16. -- You should have received a copy of the GNU General Public         -- 
  17. -- License along with this library; if not, write to the             -- 
  18. -- Free Software Foundation, Inc., 59 Temple Place - Suite 330,      -- 
  19. -- Boston, MA 02111-1307, USA.                                       -- 
  20. --                                                                   -- 
  21. -- As a special exception, if other files instantiate generics from  -- 
  22. -- this unit, or you link this unit with other files to produce an   -- 
  23. -- executable, this  unit  does not  by itself cause  the resulting  -- 
  24. -- executable to be covered by the GNU General Public License. This  -- 
  25. -- exception does not however invalidate any other reasons why the   -- 
  26. -- executable file  might be covered by the  GNU Public License.     -- 
  27. ----------------------------------------------------------------------- 
  28.  
  29. --  <description> 
  30. -- 
  31. --  This package provides functions for handling of unicode characters and 
  32. --  utf8 strings. See also Glib.Convert. 
  33. -- 
  34. --  </description> 
  35. --  <c_version>2.2.1</c_version> 
  36. --  <group>Glib, the general-purpose library</group> 
  37.  
  38. with Interfaces.C.Strings; 
  39.  
  40. package Glib.Unicode is 
  41.    pragma Preelaborate; 
  42.  
  43.    package ICS renames Interfaces.C.Strings; 
  44.  
  45.    procedure UTF8_Validate 
  46.      (Str         : UTF8_String; 
  47.       Valid       : out Boolean; 
  48.       Invalid_Pos : out Natural); 
  49.    --  Validate a UTF8 string. 
  50.    --  Set Valid to True if valid, set Invalid_Pos to first invalid byte. 
  51.  
  52.    ----------------------- 
  53.    -- Character classes -- 
  54.    ----------------------- 
  55.  
  56.    type G_Unicode_Type is 
  57.      (Unicode_Control, 
  58.       Unicode_Format, 
  59.       Unicode_Unassigned, 
  60.       Unicode_Private_Use, 
  61.       Unicode_Surrogate, 
  62.       Unicode_Lowercase_Letter, 
  63.       Unicode_Modifier_Letter, 
  64.       Unicode_Other_Letter, 
  65.       Unicode_Titlecase_Letter, 
  66.       Unicode_Uppercase_Letter, 
  67.       Unicode_Combining_Mark, 
  68.       Unicode_Enclosing_Mark, 
  69.       Unicode_Non_Spacing_Mark, 
  70.       Unicode_Decimal_Number, 
  71.       Unicode_Letter_Number, 
  72.       Unicode_Other_Number, 
  73.       Unicode_Connect_Punctuation, 
  74.       Unicode_Dash_Punctuation, 
  75.       Unicode_Close_Punctuation, 
  76.       Unicode_Final_Punctuation, 
  77.       Unicode_Initial_Punctuation, 
  78.       Unicode_Other_Punctuation, 
  79.       Unicode_Open_Punctuation, 
  80.       Unicode_Currency_Symbol, 
  81.       Unicode_Modifier_Symbol, 
  82.       Unicode_Math_Symbol, 
  83.       Unicode_Other_Symbol, 
  84.       Unicode_Line_Separator, 
  85.       Unicode_Paragraph_Separator, 
  86.       Unicode_Space_Separator); 
  87.    --  The possible character classifications. 
  88.    --  See http://www.unicode.org/Public/UNIDATA/UCD.html 
  89.  
  90.    function Is_Space (Char : Gunichar) return Boolean; 
  91.    --  True if Char is a space character 
  92.  
  93.    function Is_Alnum (Char : Gunichar) return Boolean; 
  94.    --  True if Char is an alphabetical or numerical character 
  95.  
  96.    function Is_Alpha (Char : Gunichar) return Boolean; 
  97.    --  True if Char is an alphabetical character 
  98.  
  99.    function Is_Digit (Char : Gunichar) return Boolean; 
  100.    --  True if Char is a digit 
  101.  
  102.    function Is_Lower (Char : Gunichar) return Boolean; 
  103.    --  True if Char is a lower-case character 
  104.  
  105.    function Is_Upper (Char : Gunichar) return Boolean; 
  106.    --  True if Char is an upper-case character 
  107.  
  108.    function Is_Punct (Char : Gunichar) return Boolean; 
  109.    --  True if Char is a punctuation character 
  110.  
  111.    function Unichar_Type (Char : Gunichar) return G_Unicode_Type; 
  112.    --  Return the unicode character type of a given character 
  113.  
  114.    ------------------- 
  115.    -- Case handling -- 
  116.    ------------------- 
  117.  
  118.    function To_Lower (Char : Gunichar) return Gunichar; 
  119.    --  Convert Char to lower cases 
  120.  
  121.    function To_Upper (Char : Gunichar) return Gunichar; 
  122.    --  Convert Char to upper cases 
  123.  
  124.    function UTF8_Strdown 
  125.      (Str : ICS.chars_ptr; Len : Integer) return ICS.chars_ptr; 
  126.    pragma Import (C, UTF8_Strdown, "g_utf8_strdown"); 
  127.    --  Convert all characters in Str to lowercase. The resulting string 
  128.    --  must be freed by the user. It can have a different length than 
  129.    --  Str. 
  130.  
  131.    function UTF8_Strdown (Str : UTF8_String) return UTF8_String; 
  132.    --  Convert Str to lower cases 
  133.  
  134.    function UTF8_Strup 
  135.      (Str : ICS.chars_ptr; Len : Integer) return ICS.chars_ptr; 
  136.    pragma Import (C, UTF8_Strup, "g_utf8_strup"); 
  137.    --  Convert all characters in Str to uppercase. The resulting string is 
  138.    --  newly allocated, and can have a different length than Str (for 
  139.    --  instance, the german ess-zet is converted to SS). 
  140.    --  The returned string must be freed by the caller. 
  141.  
  142.    function UTF8_Strup (Str : UTF8_String) return UTF8_String; 
  143.    --  Convert Str to upper cases 
  144.  
  145.    --------------------------- 
  146.    --  Manipulating strings -- 
  147.    --------------------------- 
  148.  
  149.    function UTF8_Strlen 
  150.      (Str : ICS.chars_ptr; Max : Integer := -1) return Glong; 
  151.    pragma Import (C, UTF8_Strlen, "g_utf8_strlen"); 
  152.    --  Return the length of a utf8-encoded string. 
  153.    --  Max is the maximal number of bytes to examine. If it is negative, then 
  154.    --  the string is assumed to be nul-terminated. 
  155.  
  156.    function UTF8_Strlen (Str : UTF8_String) return Glong; 
  157.    --  Return the number of characters in Str 
  158.  
  159.    function UTF8_Find_Next_Char 
  160.      (Str     : ICS.chars_ptr; 
  161.       Str_End : ICS.chars_ptr := ICS.Null_Ptr) return ICS.chars_ptr; 
  162.    pragma Import (C, UTF8_Find_Next_Char, "g_utf8_find_next_char"); 
  163.    --  Find the start of the next UTF8 character after Str. 
  164.    --  Str_End points to the end of the string. If Null_Ptr, the string must 
  165.    --  be nul-terminated 
  166.  
  167.    function UTF8_Find_Next_Char 
  168.      (Str : UTF8_String; Index : Natural) return Natural; 
  169.    pragma Inline (UTF8_Find_Next_Char); 
  170.    --  Find the start of the next UTF8 character after the Index-th byte. 
  171.    --  Index doesn't need to be on the start of a character. 
  172.    --  Index is set to a value greater than Str'Last if there is no more 
  173.    --  character. 
  174.  
  175.    function UTF8_Next_Char 
  176.      (Str : UTF8_String; Index : Natural) return Natural; 
  177.    pragma Inline (UTF8_Next_Char); 
  178.    --  Find the start of the next UTF8 character after the Index-th byte. 
  179.    --  Index has to be on the start of a character. 
  180.    --  Index is set to a value greater than Str'Last if there is no more 
  181.    --  character. 
  182.  
  183.    function UTF8_Find_Prev_Char 
  184.      (Str_Start : ICS.chars_ptr; Str : ICS.chars_ptr) return ICS.chars_ptr; 
  185.    pragma Import (C, UTF8_Find_Prev_Char, "g_utf8_find_prev_char"); 
  186.    --  Find the start of the previous UTF8 character before Str. 
  187.    --  Str_Start is a pointer to the beginning of the string. 
  188.    --  Null_Ptr is returned if there is no previous character 
  189.  
  190.    function UTF8_Find_Prev_Char 
  191.      (Str : UTF8_String; Index : Natural) return Natural; 
  192.    --  Find the start of the previous UTF8 character after the Index-th byte. 
  193.    --  Index doesn't need to be on the start of a character. 
  194.    --  Index is set to a value smaller than Str'First if there is no 
  195.    --  previous character. 
  196.  
  197.    ----------------- 
  198.    -- Conversions -- 
  199.    ----------------- 
  200.  
  201.    function Unichar_To_UTF8 
  202.      (C : Gunichar; Buffer : ICS.chars_ptr := ICS.Null_Ptr) return Natural; 
  203.    pragma Import (C, Unichar_To_UTF8, "g_unichar_to_utf8"); 
  204.    --  Encode C into Buffer, which must have at least 6 bytes free. 
  205.    --  Return the number of bytes written in Buffer. 
  206.    --  If Buffer is Null_Ptr, then the only effect is to compute the number of 
  207.    --  bytes to encode C. 
  208.  
  209.    procedure Unichar_To_UTF8 
  210.      (C      : Gunichar; 
  211.       Buffer : out UTF8_String; 
  212.       Last   : out Natural); 
  213.    --  Encode C into Buffer. Buffer must have at least 6 bytes free. 
  214.    --  Return the index of the last byte written in Buffer. 
  215.  
  216.    function UTF8_Get_Char (Str : UTF8_String) return Gunichar; 
  217.    --  Converts a sequence of bytes encoded as UTF8 to a unicode character. 
  218.    --  If Str doesn't point to a valid UTF8 encoded character, the result is 
  219.    --  undefined. 
  220.  
  221.    function UTF8_Get_Char_Validated (Str : UTF8_String) return Gunichar; 
  222.    --  Same as above. However, if the sequence if an incomplete start of a 
  223.    --  possibly valid character, it returns -2. If the sequence is invalid, 
  224.    --  returns -1. 
  225.  
  226.    --  ??? Gunichar is unsigned, how can we test -2 or -1 ? 
  227.  
  228. private 
  229.    pragma Convention (C, G_Unicode_Type); 
  230.    pragma Import (C, To_Upper, "g_unichar_toupper"); 
  231.    pragma Import (C, To_Lower, "g_unichar_tolower"); 
  232.    pragma Import (C, Unichar_Type, "g_unichar_type"); 
  233. end Glib.Unicode;