Package translate :: Package storage :: Package xml_extract :: Module extract
[hide private]
[frames] | no frames]

Source Code for Module translate.storage.xml_extract.extract

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  # 
  4  # Copyright 2008-2009 Zuza Software Foundation 
  5  # 
  6  # This file is part of the Translate Toolkit. 
  7  # 
  8  # This program is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  # 
 13  # This program is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with this program; if not, see <http://www.gnu.org/licenses/>. 
 20   
 21  from lxml import etree 
 22   
 23  from translate.storage import base 
 24  from translate.misc.typecheck import accepts, Self, IsCallable, IsOneOf, Any, Class 
 25  from translate.misc.typecheck.typeclasses import Number 
 26  from translate.misc.contextlib import contextmanager, nested 
 27  from translate.misc.context import with_ 
 28  from translate.storage.xml_extract import xpath_breadcrumb 
 29  from translate.storage.xml_extract import misc 
 30  from translate.storage.placeables import xliff, StringElem 
31 32 -def Nullable(t):
33 return IsOneOf(t, type(None))
34 35 TranslatableClass = Class('Translatable')
36 37 -class Translatable(object):
38 """A node corresponds to a translatable element. A node may 39 have children, which correspond to placeables.""" 40 @accepts(Self(), unicode, unicode, etree._Element, [IsOneOf(TranslatableClass, unicode)])
41 - def __init__(self, placeable_name, xpath, dom_node, source):
42 self.placeable_name = placeable_name 43 self.source = source 44 self.xpath = xpath 45 self.is_inline = False 46 self.dom_node = dom_node
47
48 - def _get_placeables(self):
49 return [placeable for placeable in self.source if isinstance(placeable, Translatable)]
50 51 placeables = property(_get_placeables)
52
53 @accepts(IsCallable(), Translatable, state=[Any()]) 54 -def reduce_unit_tree(f, unit_node, *state):
55 return misc.reduce_tree(f, unit_node, unit_node, lambda unit_node: unit_node.placeables, *state)
56
57 -class ParseState(object):
58 """Maintain constants and variables used during the walking of a 59 DOM tree (via the function apply)."""
60 - def __init__(self, no_translate_content_elements, inline_elements = {}, nsmap = {}):
61 self.no_translate_content_elements = no_translate_content_elements 62 self.inline_elements = inline_elements 63 self.is_inline = False 64 self.xpath_breadcrumb = xpath_breadcrumb.XPathBreadcrumb() 65 self.placeable_name = u"<top-level>" 66 self.nsmap = nsmap
67 68 @accepts(etree._Element, ParseState)
69 -def _process_placeable(dom_node, state):
70 """Run find_translatable_dom_nodes on the current dom_node""" 71 placeable = find_translatable_dom_nodes(dom_node, state) 72 # This happens if there were no recognized child tags and thus 73 # no translatable is returned. Make a placeable with the name 74 # "placeable" 75 if len(placeable) == 0: 76 return Translatable(u"placeable", state.xpath_breadcrumb.xpath, dom_node, []) 77 # The ideal situation: we got exactly one translateable back 78 # when processing this tree. 79 elif len(placeable) == 1: 80 return placeable[0] 81 else: 82 raise Exception("BUG: find_translatable_dom_nodes should never return more than a single translatable")
83 84 @accepts(etree._Element, ParseState)
85 -def _process_placeables(dom_node, state):
86 """Return a list of placeables and list with 87 alternating string-placeable objects. The former is 88 useful for directly working with placeables and the latter 89 is what will be used to build the final translatable string.""" 90 91 source = [] 92 for child in dom_node: 93 source.extend([_process_placeable(child, state), unicode(child.tail or u"")]) 94 return source
95 96 @accepts(etree._Element, ParseState)
97 -def _process_translatable(dom_node, state):
98 source = [unicode(dom_node.text or u"")] + _process_placeables(dom_node, state) 99 translatable = Translatable(state.placeable_name, state.xpath_breadcrumb.xpath, dom_node, source) 100 translatable.is_inline = state.is_inline 101 return [translatable]
102 103 @accepts(etree._Element, ParseState)
104 -def _process_children(dom_node, state):
105 _namespace, tag = misc.parse_tag(dom_node.tag) 106 children = [find_translatable_dom_nodes(child, state) for child in dom_node] 107 # Flatten a list of lists into a list of elements 108 children = [child for child_list in children for child in child_list] 109 if len(children) > 1: 110 intermediate_translatable = Translatable(tag, state.xpath_breadcrumb.xpath, dom_node, children) 111 return [intermediate_translatable] 112 else: 113 return children
114
115 -def compact_tag(nsmap, namespace, tag):
116 if namespace in nsmap: 117 return u'%s:%s' % (nsmap[namespace], tag) 118 else: 119 return u'{%s}%s' % (namespace, tag)
120 121 @accepts(etree._Element, ParseState)
122 -def find_translatable_dom_nodes(dom_node, state):
123 # For now, we only want to deal with XML elements. 124 # And we want to avoid processing instructions, which 125 # are XML elements (in the inheritance hierarchy). 126 if not isinstance(dom_node, etree._Element) or \ 127 isinstance(dom_node, etree._ProcessingInstruction): 128 return [] 129 130 namespace, tag = misc.parse_tag(dom_node.tag) 131 132 @contextmanager 133 def xpath_set(): 134 state.xpath_breadcrumb.start_tag(compact_tag(state.nsmap, namespace, tag)) 135 yield state.xpath_breadcrumb 136 state.xpath_breadcrumb.end_tag()
137 138 @contextmanager 139 def placeable_set(): 140 old_placeable_name = state.placeable_name 141 state.placeable_name = tag 142 yield state.placeable_name 143 state.placeable_name = old_placeable_name 144 145 @contextmanager 146 def inline_set(): 147 old_inline = state.is_inline 148 if (namespace, tag) in state.inline_elements: 149 state.is_inline = True 150 else: 151 state.is_inline = False 152 yield state.is_inline 153 state.is_inline = old_inline 154 155 def with_block(xpath_breadcrumb, placeable_name, is_inline): 156 if (namespace, tag) not in state.no_translate_content_elements: 157 return _process_translatable(dom_node, state) 158 else: 159 return _process_children(dom_node, state) 160 return with_(nested(xpath_set(), placeable_set(), inline_set()), with_block) 161
162 -class IdMaker(object):
163 - def __init__(self):
164 self._max_id = 0 165 self._obj_id_map = {}
166
167 - def get_id(self, obj):
168 if not self.has_id(obj): 169 self._obj_id_map[obj] = self._max_id 170 self._max_id += 1 171 return self._obj_id_map[obj]
172
173 - def has_id(self, obj):
174 return obj in self._obj_id_map
175
176 @accepts(Nullable(Translatable), Translatable, IdMaker) 177 -def _to_placeables(parent_translatable, translatable, id_maker):
178 result = [] 179 for chunk in translatable.source: 180 if isinstance(chunk, unicode): 181 result.append(chunk) 182 else: 183 id = unicode(id_maker.get_id(chunk)) 184 if chunk.is_inline: 185 result.append(xliff.G(sub=_to_placeables(parent_translatable, chunk, id_maker), id=id)) 186 else: 187 result.append(xliff.X(id=id, xid=chunk.xpath)) 188 return result
189 190 @accepts(base.TranslationStore, Nullable(Translatable), Translatable, IdMaker)
191 -def _add_translatable_to_store(store, parent_translatable, translatable, id_maker):
192 """Construct a new translation unit, set its source and location 193 information and add it to 'store'. 194 """ 195 unit = store.UnitClass(u'') 196 unit.rich_source = [StringElem(_to_placeables(parent_translatable, translatable, id_maker))] 197 unit.addlocation(translatable.xpath) 198 store.addunit(unit)
199
200 @accepts(Translatable) 201 -def _contains_translatable_text(translatable):
202 """Checks whether translatable contains any chunks of text which contain 203 more than whitespace. 204 205 If not, then there's nothing to translate.""" 206 for chunk in translatable.source: 207 if isinstance(chunk, unicode): 208 if chunk.strip() != u"": 209 return True 210 return False
211 212 @accepts(base.TranslationStore)
213 -def _make_store_adder(store):
214 """Return a function which, when called with a Translatable will add 215 a unit to 'store'. The placeables will represented as strings according 216 to 'placeable_quoter'.""" 217 id_maker = IdMaker() 218 219 def add_to_store(parent_translatable, translatable, rid): 220 _add_translatable_to_store(store, parent_translatable, translatable, id_maker)
221 222 return add_to_store 223
224 @accepts([Translatable], IsCallable(), Nullable(Translatable), Number) 225 -def _walk_translatable_tree(translatables, f, parent_translatable, rid):
226 for translatable in translatables: 227 if _contains_translatable_text(translatable) and not translatable.is_inline: 228 rid = rid + 1 229 new_parent_translatable = translatable 230 f(parent_translatable, translatable, rid) 231 else: 232 new_parent_translatable = parent_translatable 233 234 _walk_translatable_tree(translatable.placeables, f, new_parent_translatable, rid)
235
236 -def reverse_map(a_map):
237 return dict((value, key) for key, value in a_map.iteritems())
238 239 @accepts(lambda obj: hasattr(obj, "read"), base.TranslationStore, ParseState, Nullable(IsCallable()))
240 -def build_store(odf_file, store, parse_state, store_adder = None):
241 """Utility function for loading xml_filename""" 242 store_adder = store_adder or _make_store_adder(store) 243 tree = etree.parse(odf_file) 244 root = tree.getroot() 245 parse_state.nsmap = reverse_map(root.nsmap) 246 translatables = find_translatable_dom_nodes(root, parse_state) 247 _walk_translatable_tree(translatables, store_adder, None, 0) 248 return tree
249