Package translate :: Package search :: Package indexing
[hide private]
[frames] | no frames]

Source Code for Package translate.search.indexing

  1  # -*- coding: utf-8 -*- 
  2  # 
  3  # Copyright 2008 Zuza Software Foundation 
  4  #  
  5  # This file is part of translate. 
  6  # 
  7  # translate is free software; you can redistribute it and/or modify 
  8  # it under the terms of the GNU General Public License as published by 
  9  # the Free Software Foundation; either version 2 of the License, or 
 10  # (at your option) any later version. 
 11  #  
 12  # translate is distributed in the hope that it will be useful, 
 13  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 14  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 15  # GNU General Public License for more details. 
 16  # 
 17  # You should have received a copy of the GNU General Public License 
 18  # along with translate; if not, write to the Free Software 
 19  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 20  # 
 21   
 22   
 23  """ 
 24  interface for differrent indexing engines for the translate toolkit 
 25   
 26  """ 
 27   
 28  __revision__ = "$Id: __init__.py 12562 2009-10-22 11:04:27Z alaaosh $" 
 29   
 30  import CommonIndexer 
 31  import os 
 32  import shutil 
 33  import logging 
 34   
 35  """ TODO for indexing engines: 
 36      * get rid of jToolkit.glock dependency 
 37      * add partial matching at the beginning of a term 
 38      * do a proper cleanup - e.g.: the pylucene lockfiles remain in /tmp/ 
 39      * do unittests for PyLucene v1.x 
 40      """ 
 41   
42 -def _get_available_indexers():
43 """get a list of the available supported indexing engines 44 45 search through the translate.search.indexer package for modules derived from 46 the CommonIndexer class 47 """ 48 result = [] 49 # get the package directory 50 indexer_dir = os.path.dirname(os.path.abspath(__file__)) 51 # sort the files in the directory by name - to make it determinable, 52 # which indexing engine is chosen in case of multiple possibilities 53 all_files = os.listdir(indexer_dir) 54 all_files.sort() 55 for mod_file in all_files: 56 if mod_file == __file__: 57 # we should not import ourself 58 continue 59 mod_path = os.path.join(indexer_dir, mod_file) 60 if (not mod_path.endswith(".py")) or (not os.path.isfile(mod_path)) \ 61 or (not os.access(mod_path, os.R_OK)): 62 # no file / wrong extension / not readable -> skip it 63 continue 64 # strip the ".py" prefix 65 mod_name = mod_file[:-3] 66 # TODO - debug: "[Indexer]: trying to import indexing engines from '%s'" % mod_path 67 try: 68 module = __import__(mod_name, globals(), {}) 69 except ImportError: 70 # maybe it is unusable or dependencies are missing 71 continue 72 # the module function "is_available" must return "True" 73 if not (hasattr(module, "is_available") and \ 74 callable(module.is_available) and \ 75 module.is_available()): 76 continue 77 for item in dir(module): 78 try: 79 element = getattr(module, item) 80 except TypeError: 81 # this rarely happens: e.g. for 'item' being 'None' 82 continue 83 try: 84 # the class must inherit CommonDatabase (without being the same) 85 if issubclass(element, CommonIndexer.CommonDatabase) \ 86 and not element is CommonIndexer.CommonDatabase: 87 # TODO: debug - "[Indexer]: indexing engine found in '%s': %s" % (mod_path, element) 88 # the interface is ok 89 result.append(element) 90 except TypeError: 91 # 'element' is not a class 92 continue 93 return result
94
95 -def _sort_indexers_by_preference(indexer_classes, pref_order):
96 """sort a given list of indexer classes according to the given order 97 98 the list of preferred indexers are strings that should match the filenames 99 (without suppix ".py") of the respective modules (e.g.: XapianIndexer or 100 PyLuceneIndexer) 101 102 @param indexer_classes: the list of all available indexer classes 103 @type indexer_classes: list of CommonIndexer.CommonDatabase objects 104 @param pref_order: list of preferred indexer names 105 @type pref_order: str 106 @return: sorted list of indexer classes 107 @rtype: list of CommonIndexer.CommonDatabase objects 108 """ 109 # define useful function for readability 110 get_indexer_name = lambda indexer_class: \ 111 os.path.basename(indexer_class.__module__).split(".")[-1] 112 # use a copy to avoid side effects 113 avail_indexers = indexer_classes[:] 114 result = [] 115 # go through all preferred items and move the matching indexers to 'result' 116 for choice in pref_order: 117 # find matching indexers 118 matches = [ indexer for indexer in avail_indexers 119 if get_indexer_name(indexer) == choice ] 120 # move all matching items to the 'result' queue 121 for match_item in matches: 122 result.append(match_item) 123 avail_indexers.remove(match_item) 124 # append the remaining indexers to the result 125 return result + avail_indexers
126 127 128 # store the available indexers - this is done only once during the first import 129 _AVAILABLE_INDEXERS = _get_available_indexers() 130 131 # True for a not-empty list - this should be used to check if indexing support 132 # is available 133 HAVE_INDEXER = bool(_AVAILABLE_INDEXERS) 134 135
136 -def get_indexer(basedir, preference=None):
137 """return an appropriate indexer for the given directory 138 139 If the directory already exists, then we check, if one of the available 140 indexers knows how to handle it. Otherwise we return the first available 141 indexer. 142 143 @raise IndexError: there is no indexing engine available 144 @raise ValueError: the database location already exists, but we did not find 145 a suitable indexing engine for it 146 @raise OSError: any error that could occour while creating or opening the 147 database 148 149 @param basedir: the parent directory of (possible) different indexing 150 databases 151 @type basedir: string 152 @return: the class of the most appropriate indexer 153 @rtype: subclass of L{CommonIndexer.CommonDatabase} 154 """ 155 if not _AVAILABLE_INDEXERS: 156 raise IndexError("Indexer: no indexing engines are available") 157 if preference is None: 158 preference = [] 159 # sort available indexers by preference 160 preferred_indexers = _sort_indexers_by_preference(_AVAILABLE_INDEXERS, 161 preference) 162 if os.path.exists(basedir): 163 for index_class in preferred_indexers: 164 try: 165 # the first match is sufficient - but we do not want to 166 # create a new database, if a database for another 167 # indexing engine could exist. Thus we try it read-only first. 168 return index_class(basedir, create_allowed=False) 169 except (ValueError, OSError): 170 # invalid type of database or some other error 171 continue 172 # the database does not exist yet or we did not find an appropriate 173 # class that can handle it - so we remove the whole base directory 174 shutil.rmtree(basedir, ignore_errors=True) 175 logging.info("Deleting invalid indexing directory '%s'", basedir) 176 # the database does not exist or it was deleted (see above) 177 # we choose the first available indexing engine 178 return preferred_indexers[0](basedir)
179 180 181 if __name__ == "__main__": 182 # show all supported indexing engines (with fulfilled requirements) 183 for ONE_INDEX in _AVAILABLE_INDEXERS: 184 print ONE_INDEX 185