1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """
24 interface for the pylucene (v1.x) indexing engine
25
26 take a look at PyLuceneIndexer.py for PyLucene v2.x support
27 """
28
29 __revision__ = "$Id: PyLuceneIndexer1.py 8505 2008-09-27 09:08:34Z dwaynebailey $"
30
31
32 import PyLuceneIndexer
33 import PyLucene
34
35
38
39
41 """manage and use a pylucene indexing database"""
42
45 """generate a query for a plain term of a string query
46
47 basically this function parses the string and returns the resulting
48 query
49
50 @param text: the query string
51 @type text: str
52 @param require_all: boolean operator
53 (True -> AND (default) / False -> OR)
54 @type require_all: bool
55 @param analyzer: the analyzer to be used
56 possible analyzers are:
57 - L{CommonDatabase.ANALYZER_TOKENIZE}
58 the field value is splitted to be matched word-wise
59 - L{CommonDatabase.ANALYZER_PARTIAL}
60 the field value must start with the query string
61 - L{CommonDatabase.ANALYZER_EXACT}
62 keep special characters and the like
63 @type analyzer: bool
64 @return: resulting query object
65 @rtype: PyLucene.Query
66 """
67 if analyzer is None:
68 analyzer = self.analyzer
69 if analyzer == self.ANALYZER_EXACT:
70
71
72 pass
73
74 if analyzer == self.ANALYZER_EXACT:
75 analyzer_obj = self.ExactAnalyzer()
76 else:
77 text = _escape_term_value(text)
78 analyzer_obj = PyLucene.StandardAnalyzer()
79 qp = PyLucene.QueryParser(analyzer=analyzer_obj)
80 if require_all:
81 qp.setDefaultOperator(qp.Operator.AND)
82 else:
83 qp.setDefaultOperator(qp.Operator.OR)
84 if (analyzer & self.ANALYZER_PARTIAL) > 0:
85
86 text += "*"
87 return qp.parse(text)
88
90 """generate a field query
91
92 this functions creates a field->value query
93
94 @param field: the fieldname to be used
95 @type field: str
96 @param value: the wanted value of the field
97 @type value: str
98 @param analyzer: the analyzer to be used
99 possible analyzers are:
100 - L{CommonDatabase.ANALYZER_TOKENIZE}
101 the field value is splitted to be matched word-wise
102 - L{CommonDatabase.ANALYZER_PARTIAL}
103 the field value must start with the query string
104 - L{CommonDatabase.ANALYZER_EXACT}
105 keep special characters and the like
106 @type analyzer: bool
107 @return: resulting query object
108 @rtype: PyLucene.Query
109 """
110 if analyzer is None:
111 analyzer = self.analyzer
112 if analyzer == self.ANALYZER_EXACT:
113 analyzer_obj = self.ExactAnalyzer()
114 else:
115 value = _escape_term_value(value)
116 analyzer_obj = PyLucene.StandardAnalyzer()
117 if (analyzer & self.ANALYZER_PARTIAL) > 0:
118
119 value += "*"
120 return PyLucene.QueryParser.parse(value, field, analyzer_obj)
121
123 """generate a combined query
124
125 @param queries: list of the original queries
126 @type queries: list of xapian.Query
127 @param require_all: boolean operator
128 (True -> AND (default) / False -> OR)
129 @type require_all: bool
130 @return: the resulting combined query object
131 @rtype: PyLucene.Query
132 """
133 combined_query = PyLucene.BooleanQuery()
134 for query in queries:
135 combined_query.add(
136 PyLucene.BooleanClause(query, require_all, False))
137 return combined_query
138
140 """add a term to a document
141
142 @param document: the document to be changed
143 @type document: xapian.Document | PyLucene.Document
144 @param term: a single term to be added
145 @type term: str
146 @param tokenize: should the term be tokenized automatically
147 @type tokenize: bool
148 """
149
150 document.add(PyLucene.Field(str(PyLuceneIndex.UNNAMED_FIELD_NAME), term,
151 True, True, tokenize))
152
154 """add a field term to a document
155
156 @param document: the document to be changed
157 @type document: xapian.Document | PyLucene.Document
158 @param field: name of the field
159 @type field: str
160 @param term: term to be associated to the field
161 @type term: str
162 @param tokenize: should the term be tokenized automatically
163 @type tokenize: bool
164 """
165
166
167 document.add(PyLucene.Field(str(field), term,
168 True, True, tokenize))
169
171 """return an object containing the results of a query
172
173 @param query: a pre-compiled query
174 @type query: a query object of the real implementation
175 @return: an object that allows access to the results
176 @rtype: subclass of CommonEnquire
177 """
178 return PyLucene.indexSearcher.search(query)
179
180 - def search(self, query, fieldnames):
181 """return a list of the contents of specified fields for all matches of
182 a query
183
184 @param query: the query to be issued
185 @type query: a query object of the real implementation
186 @param fieldnames: the name(s) of a field of the document content
187 @type fieldnames: string | list of strings
188 @return: a list of dicts containing the specified field(s)
189 @rtype: list of dicts
190 """
191 if isinstance(fieldnames, basestring):
192 fieldnames = [fieldnames]
193 hits = PyLucene.indexSearcher.search(query)
194 result = []
195 for hit, doc in hits:
196 fields = {}
197 for fieldname in fieldnames:
198 content = doc.get(fieldname)
199 if not content is None:
200 fields[fieldname] = content
201 result.append(fields)
202 return result
203
210