1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """
24 interface for differrent indexing engines for the translate toolkit
25
26 """
27
28 __revision__ = "$Id: __init__.py 12562 2009-10-22 11:04:27Z alaaosh $"
29
30 import CommonIndexer
31 import os
32 import shutil
33 import logging
34
35 """ TODO for indexing engines:
36 * get rid of jToolkit.glock dependency
37 * add partial matching at the beginning of a term
38 * do a proper cleanup - e.g.: the pylucene lockfiles remain in /tmp/
39 * do unittests for PyLucene v1.x
40 """
41
43 """get a list of the available supported indexing engines
44
45 search through the translate.search.indexer package for modules derived from
46 the CommonIndexer class
47 """
48 result = []
49
50 indexer_dir = os.path.dirname(os.path.abspath(__file__))
51
52
53 all_files = os.listdir(indexer_dir)
54 all_files.sort()
55 for mod_file in all_files:
56 if mod_file == __file__:
57
58 continue
59 mod_path = os.path.join(indexer_dir, mod_file)
60 if (not mod_path.endswith(".py")) or (not os.path.isfile(mod_path)) \
61 or (not os.access(mod_path, os.R_OK)):
62
63 continue
64
65 mod_name = mod_file[:-3]
66
67 try:
68 module = __import__(mod_name, globals(), {})
69 except ImportError:
70
71 continue
72
73 if not (hasattr(module, "is_available") and \
74 callable(module.is_available) and \
75 module.is_available()):
76 continue
77 for item in dir(module):
78 try:
79 element = getattr(module, item)
80 except TypeError:
81
82 continue
83 try:
84
85 if issubclass(element, CommonIndexer.CommonDatabase) \
86 and not element is CommonIndexer.CommonDatabase:
87
88
89 result.append(element)
90 except TypeError:
91
92 continue
93 return result
94
96 """sort a given list of indexer classes according to the given order
97
98 the list of preferred indexers are strings that should match the filenames
99 (without suppix ".py") of the respective modules (e.g.: XapianIndexer or
100 PyLuceneIndexer)
101
102 @param indexer_classes: the list of all available indexer classes
103 @type indexer_classes: list of CommonIndexer.CommonDatabase objects
104 @param pref_order: list of preferred indexer names
105 @type pref_order: str
106 @return: sorted list of indexer classes
107 @rtype: list of CommonIndexer.CommonDatabase objects
108 """
109
110 get_indexer_name = lambda indexer_class: \
111 os.path.basename(indexer_class.__module__).split(".")[-1]
112
113 avail_indexers = indexer_classes[:]
114 result = []
115
116 for choice in pref_order:
117
118 matches = [ indexer for indexer in avail_indexers
119 if get_indexer_name(indexer) == choice ]
120
121 for match_item in matches:
122 result.append(match_item)
123 avail_indexers.remove(match_item)
124
125 return result + avail_indexers
126
127
128
129 _AVAILABLE_INDEXERS = _get_available_indexers()
130
131
132
133 HAVE_INDEXER = bool(_AVAILABLE_INDEXERS)
134
135
137 """return an appropriate indexer for the given directory
138
139 If the directory already exists, then we check, if one of the available
140 indexers knows how to handle it. Otherwise we return the first available
141 indexer.
142
143 @raise IndexError: there is no indexing engine available
144 @raise ValueError: the database location already exists, but we did not find
145 a suitable indexing engine for it
146 @raise OSError: any error that could occour while creating or opening the
147 database
148
149 @param basedir: the parent directory of (possible) different indexing
150 databases
151 @type basedir: string
152 @return: the class of the most appropriate indexer
153 @rtype: subclass of L{CommonIndexer.CommonDatabase}
154 """
155 if not _AVAILABLE_INDEXERS:
156 raise IndexError("Indexer: no indexing engines are available")
157 if preference is None:
158 preference = []
159
160 preferred_indexers = _sort_indexers_by_preference(_AVAILABLE_INDEXERS,
161 preference)
162 if os.path.exists(basedir):
163 for index_class in preferred_indexers:
164 try:
165
166
167
168 return index_class(basedir, create_allowed=False)
169 except (ValueError, OSError):
170
171 continue
172
173
174 shutil.rmtree(basedir, ignore_errors=True)
175 logging.info("Deleting invalid indexing directory '%s'", basedir)
176
177
178 return preferred_indexers[0](basedir)
179
180
181 if __name__ == "__main__":
182
183 for ONE_INDEX in _AVAILABLE_INDEXERS:
184 print ONE_INDEX
185