1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 """Manage the OmegaT glossary format
22
23 OmegaT glossary format is used by the
24 U{OmegaT<http://www.omegat.org/en/omegat.html>} computer aided
25 translation tool.
26
27 It is a bilingual base class derived format with L{OmegaTFile}
28 and L{OmegaTUnit} providing file and unit level access.
29
30 Format Implementation
31 =====================
32 The OmegaT glossary format is a simple Tab Separated Value (TSV) file
33 with the columns: source, target, comment.
34
35 The dialect of the TSV files is specified by L{OmegaTDialect}.
36
37 Encoding
38 --------
39 The files are either UTF-8 or encoded using the system default. UTF-8
40 encoded files use the .utf8 extension while system encoded files use
41 the .tab extension.
42 """
43
44 import csv
45 import locale
46 import os.path
47 import sys
48 import time
49 from translate.storage import base
50
51 OMEGAT_FIELDNAMES = ["source", "target", "comment"]
52 """Field names for an OmegaT glossary unit"""
53
54
67 csv.register_dialect("omegat", OmegaTDialect)
68
70 """An OmegaT translation memory unit"""
76
78 """Get the dictionary of values for a OmegaT line"""
79 return self._dict
80
82 """Set the dictionary of values for a OmegaT line
83
84 @param newdict: a new dictionary with OmegaT line elements
85 @type newdict: Dict
86 """
87
88 self._dict = newdict
89 dict = property(getdict, setdict)
90
92 if key not in self._dict:
93 return None
94 elif self._dict[key]:
95 return self._dict[key].decode('utf-8')
96 else:
97 return ""
98
100 if newvalue is None:
101 self._dict[key] = None
102 if isinstance(newvalue, unicode):
103 newvalue = newvalue.encode('utf-8')
104 if not key in self._dict or newvalue != self._dict[key]:
105 self._dict[key] = newvalue
106
109
112
114 self._rich_source = None
115 return self._set_field('source', newsource)
116 source = property(getsource, setsource)
117
120
122 self._rich_target = None
123 return self._set_field('target', newtarget)
124 target = property(gettarget, settarget)
125
127 self._dict['target-lang'] = newlang
128 targetlang = property(None, settargetlang)
129
131 return str(self._dict)
132
134 return bool(self._dict.get('target', None))
135
136
138 """An OmegaT translation memory file"""
139 Name = _("OmegaT Glossary")
140 Mimetypes = ["application/x-omegat-glossary"]
141 Extensions = ["utf8"]
143 """Construct an OmegaT glossary, optionally reading in from inputfile."""
144 self.UnitClass = unitclass
145 base.TranslationStore.__init__(self, unitclass=unitclass)
146 self.filename = ''
147 self.extension = ''
148 self._encoding = self._get_encoding()
149 if inputfile is not None:
150 self.parse(inputfile)
151
154
156 """parsese the given file or file source string"""
157 if hasattr(input, 'name'):
158 self.filename = input.name
159 elif not getattr(self, 'filename', ''):
160 self.filename = ''
161 if hasattr(input, "read"):
162 tmsrc = input.read()
163 input.close()
164 input = tmsrc
165 try:
166 input = input.decode(self._encoding).encode('utf-8')
167 except:
168 raise ValueError("OmegaT files are either UTF-8 encoded or use the default system encoding")
169 lines = csv.DictReader(input.split("\n"), fieldnames=OMEGAT_FIELDNAMES, dialect="omegat")
170 for line in lines:
171 newunit = OmegaTUnit()
172 newunit.dict = line
173 self.addunit(newunit)
174
176 output = csv.StringIO()
177 writer = csv.DictWriter(output, fieldnames=OMEGAT_FIELDNAMES, dialect="omegat")
178 unit_count = 0
179 for unit in self.units:
180 if unit.istranslated():
181 unit_count += 1
182 writer.writerow(unit.dict)
183 if unit_count == 0:
184 return ""
185 output.reset()
186 decoded = "".join(output.readlines()).decode('utf-8')
187 try:
188 return decoded.encode(self._encoding)
189 except UnicodeEncodeError:
190 return decoded.encode('utf-8')
191
193 """An OmegT translation memory file in the default system encoding"""
194
195
196 Name = None
197 Mimetypes = ["application/x-omegat-glossary"]
198 Extensions = ["tab"]
199
201 return locale.getdefaultlocale()[1]
202