Changeset 363
- Timestamp:
- 01/15/07 07:03:12 (2 years ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pyndexter/branches/refactoring/pyndexter/indexers/xapian.py
r361 r363 12 12 xapian = __import__('xapian') 13 13 14 def create_indexer(framework, path=None, stemmer='english', words=r'\w+', 15 **ignore): 16 return XapianIndexer(framework, path=path, stemmer=stemmer, words=words) 14 17 15 18 class XapianIndexer(Indexer): 16 capabilities = CAP_ORDERING | CAP_READONLY | CAP_ATTRIBUTES | \ 17 CAP_RELEVANCE | CAP_HITCOUNT | CAP_LIST | CAP_WHOLEWORD | \ 18 CAP_INTERSECTION 19 20 def __init__(self, stemmer='english', words=r'\w+'): 21 Indexer.__init__(self) 22 self.stemmer = xapian.Stem('english') 19 def __init__(self, framework, path, stemmer='english', words=r'\w+'): 20 Indexer.__init__(self, framework) 21 self.stemmer = xapian.Stem(stemmer) 23 22 self.words = re.compile(words) 24 23 25 def bind(self, framework): 26 Indexer.bind(self, framework) 27 self.path = os.path.join(framework.path, 'xapian.db') 24 self.path = path 25 self.xapian_path = os.path.join(path, 'xapian.db') 26 self.state_path = os.path.join(path, 'state.db') 27 28 28 if self.framework.mode == READWRITE: 29 if not os.path.exists(self. path):30 os.makedirs(self. path)31 self.db = xapian.flint_open(self. path, xapian.DB_CREATE_OR_OPEN)29 if not os.path.exists(self.xapian_path): 30 os.makedirs(self.xapian_path) 31 self.db = xapian.flint_open(self.xapian_path, xapian.DB_CREATE_OR_OPEN) 32 32 else: 33 self.db = xapian.flint_open(self. path)33 self.db = xapian.flint_open(self.xapian_path) 34 34 35 35 def index(self, document): … … 50 50 self.db.replace_document('Q' + uri, doc) 51 51 52 def update(self, document): 53 return self.index(document) 54 52 55 def discard(self, uri): 53 56 self.db.delete_document('Q' + uri.encode('utf-8')) … … 58 61 def close(self): 59 62 self.sync() 60 self.db.close()63 #self.db.close() 61 64 self.db = None 62 65 63 def search(self, phrase, flags=0, order_by=None, order_ascending=True,64 order_type=str):65 terms = [self.stemmer.stem_word(term.lower())66 for term in self.words.findall(phrase.encode('utf-8'))]66 def search(self, query): 67 query_parser = xapian.QueryParser() 68 query_parser.set_stemmer(self.stemmer) 69 query = query_parser.parse_query(self._compile_query(query)) 67 70 enquire = xapian.Enquire(self.db) 68 query = xapian.Query(xapian.Query.OP_AND, terms)69 71 enquire.set_query(query) 70 return XapianSearch(self, phrase, enquire) 72 return XapianSearch(self, query, enquire) 73 74 def state_store(self): 75 return StateStore(self.state_path) 76 77 def _compile_query(self, node): 78 if not node or node.type == node.NULL: 79 return '' 80 if node.type == node.AND: 81 return '%s AND %s' % (self._compile_query(node.left), 82 self._compile_query(node.right)) 83 elif node.type == node.OR: 84 return '%s OR %s' % (self._compile_query(node.left), 85 self._compile_query(node.right)) 86 elif node.type == node.NOT: 87 return 'NOT %s' % self._compile_query(node.left) 88 elif node.type == node.TERM: 89 return node.value 90 else: 91 raise NotImplementedError 71 92 72 93 … … 74 95 def __iter__(self): 75 96 matches = self.context.get_mset(0, 10) 76 print matches.get_matches_estimated()77 97 for hit in matches: 78 98 doc = hit[xapian.MSET_DOCUMENT] 79 uri = None 80 # TODO Use skip_to('Q') when implemented (see #26 for more info) 81 for term in doc.termlist(): 82 if term[0][0] == 'Q': 83 uri = term[0][1:] 84 break 99 terms = doc.termlist() 100 terms.skip_to('Q') 101 uri = terms.next()[0][1:] 85 102 assert uri, 'uniQue term (URI) not found in document term list' 86 yield Hit(uri, document=self.indexer.f etch,103 yield Hit(uri, document=self.indexer.framework.fetch, 87 104 did=hit[xapian.MSET_DID], 88 105 score=float(hit[xapian.MSET_PERCENT]) / 100.0) … … 90 107 def __len__(self): 91 108 return len(self.context) 92 93 # def __getitem__(self, index):94 # doc = self.indexer.idx.get_document(self.context[index]['uid'])95 # return Hit(doc.get_value(self.indexer.idx.indexValueMap['uri']),96 # document=self.indexer.fetch)
