Changeset 366
- Timestamp:
- 01/16/07 18:39:45 (2 years ago)
- Files:
-
- pyndexter/branches/refactoring/pyndexter/indexers/default.py (modified) (2 diffs)
- pyndexter/branches/refactoring/pyndexter/indexers/hype.py (modified) (4 diffs)
- pyndexter/branches/refactoring/pyndexter/indexers/hyperestraier.py (modified) (5 diffs)
- pyndexter/branches/refactoring/pyndexter/indexers/lupy.py (modified) (1 diff)
- pyndexter/branches/refactoring/pyndexter/indexers/xapian.py (modified) (2 diffs)
- pyndexter/branches/refactoring/pyndexter/__init__.py (modified) (2 diffs)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pyndexter/branches/refactoring/pyndexter/indexers/default.py
r364 r366 188 188 else: 189 189 all_uris.intersection_update(word_uris) 190 return Default Search(self, phrase, all_uris)190 return DefaultResult(self, all_uris) 191 191 search = synchronised(search) 192 192 … … 238 238 239 239 240 class Default Search(Search):240 class DefaultResult(Result): 241 241 def __iter__(self): 242 242 for uri in self.context: pyndexter/branches/refactoring/pyndexter/indexers/hype.py
r365 r366 57 57 # if order is not None: 58 58 # search = search.order(order) 59 return HypeResult(self, query,search, self.enable_scoring)59 return HypeResult(self, search, self.enable_scoring) 60 60 61 61 def optimise(self): … … 72 72 73 73 # Internal methods 74 def _translate_attributes(self, hdoc):75 attributes = {}76 for k in hdoc.attributes:77 if k[0] == '@':78 attributes[k[1:]] = hdoc.get(k)79 else:80 attributes[k] = hdoc.get(k)81 return attributes82 83 74 def _compile_query(self, node): 84 75 if not node or node.type == node.NULL: … … 103 94 104 95 class HypeResult(Result): 105 def __init__(self, indexer, phrase, context, enable_scoring=True): 96 def __init__(self, indexer, context, enable_scoring=True): 97 self.enable_scoring = enable_scoring 106 98 if enable_scoring: 107 99 context = context.scores().option(hype.ESTCONDSCFB) 108 Result.__init__(self, indexer, phrase,context)100 Result.__init__(self, indexer, context) 109 101 110 102 def __iter__(self): 111 for doc, score in self.context: 112 yield Hit(document=self.indexer.framework.fetch, 113 score=score, 114 **self.indexer._translate_attributes(doc)) 103 if self.enable_scoring: 104 for doc, score in self.context: 105 yield self._translate(doc, score) 106 else: 107 for doc in self.context: 108 yield self._translate(doc) 115 109 116 110 def __len__(self): … … 118 112 119 113 def __getitem__(self, index): 120 return self.context[index][0]['@uri'] 114 doc = self.context[index][0] 115 if self.enable_scoring: 116 score = self.context.get_score(index) 117 else: 118 score = None 119 return self._translate(doc, score) 121 120 121 # Internal methods 122 def _translate(self, doc, score=None): 123 attrs = self._translate_attributes(doc) 124 if self.enable_scoring: 125 if score is None: 126 score = self.context.get_score(index) 127 attrs['score'] = score 128 return Hit(document=self.indexer.framework.fetch, **attrs) 129 130 def _translate_attributes(self, hdoc): 131 attributes = {} 132 for k in hdoc.attributes: 133 if k[0] == '@': 134 attributes[k[1:]] = hdoc.get(k) 135 else: 136 attributes[k] = hdoc.get(k) 137 return attributes 138 pyndexter/branches/refactoring/pyndexter/indexers/hyperestraier.py
r365 r366 42 42 self.db.open(self.path, self.hype_mode) 43 43 44 def fetch(self, uri):45 uri = uri.encode('utf-8')46 id = self.db.uri_to_id(uri)47 if id == -1:48 raise DocumentNotFound(uri)49 doc = self.db.get_doc(id, 0)50 attributes = self._translate_attributes(doc)51 return Document(content=''.join(doc.texts()), source=self.framework.source, **attributes)52 53 44 def index(self, document): 54 45 hdoc = HyperEstraier.Document() … … 72 63 phrase = self._compile_query(query) 73 64 return self.hype_search(phrase, simple=False) 74 # def search(self, phrase, flags=0, order_by=None,75 # order_ascending=True, order_type=str):76 # phrase = ((not flags & SEARCH_UNION) and ' ' or '|').join(phrase.split())77 # order = None78 # if order_by is not None:79 # if order_type is int:80 # order_type = 'NUM'81 # else:82 # order_type = 'STR'83 # order = u'@%s %s%s' % (order_by, order_type,84 # order_ascending and 'A' or 'D')85 # if not flags & SEARCH_ASTERISK:86 # phrase = phrase.replace('*', '\\*')87 # if not flags & SEARCH_QUESTION:88 # phrase = phrase.replace('?', '\\?')89 # if not flags & SEARCH_WHOLEWORD:90 # phrase = '*' + '* *'.join(phrase.split()) + '*'91 # return self.hype_search(phrase, order=order)92 65 93 66 def optimise(self): … … 109 82 # if order is not None: 110 83 # search = search.order(order) 111 return HyperestraierResult(self, phrase,search)84 return HyperestraierResult(self, search) 112 85 113 86 # Internal methods … … 128 101 raise NotImplementedError 129 102 103 104 indexer_factory = ComponentFactory(HyperestraierIndexer, hype_mode=int) 105 106 107 class HyperestraierResult(Result): 108 def __iter__(self): 109 for id in self.context: 110 yield self._translate(id) 111 112 def __len__(self): 113 return len(self.context) 114 115 def __getitem__(self, index): 116 return self._translate(self.context[index]) 117 118 # Internal methods 119 def _translate(self, id): 120 doc = self.indexer.db.get_doc(id, 0) 121 return Hit(document=self.indexer.framework.fetch, 122 **self._translate_attributes(doc)) 123 130 124 def _translate_attributes(self, hdoc): 131 125 attributes = {} … … 137 131 return attributes 138 132 139 140 indexer_factory = ComponentFactory(HyperestraierIndexer, hype_mode=int)141 142 143 class HyperestraierResult(Result):144 def __iter__(self):145 for id in self.context:146 doc = self.indexer.db.get_doc(id, 0)147 yield Hit(document=self.indexer.framework.fetch,148 **self.indexer._translate_attributes(doc))149 150 def __len__(self):151 return len(self.context)152 153 def __getitem__(self, index):154 print type(self.context)155 print dir(self.context)156 return self.context[index]['@uri']pyndexter/branches/refactoring/pyndexter/indexers/lupy.py
r364 r366 46 46 searcher = lupy.search.indexsearcher.IndexSearcher(self.db_path) 47 47 hits = searcher.search(lupy_query) 48 return LupyResult(self, query,hits)48 return LupyResult(self, hits) 49 49 50 50 def optimise(self): pyndexter/branches/refactoring/pyndexter/indexers/xapian.py
r364 r366 74 74 enquire = xapian.Enquire(self.db) 75 75 enquire.set_query(query) 76 return XapianResult(self, query,enquire)76 return XapianResult(self, enquire) 77 77 78 78 def state_store(self): … … 103 103 matches = self.context.get_mset(0, 20) 104 104 for hit in matches: 105 doc = hit[xapian.MSET_DOCUMENT] 106 terms = doc.termlist() 107 terms.skip_to('Q') 108 uri = terms.next()[0][1:] 109 assert uri, 'uniQue term (URI) not found in document term list' 110 yield Hit(uri, document=self.indexer.framework.fetch, 111 did=hit[xapian.MSET_DID], 112 score=float(hit[xapian.MSET_PERCENT]) / 100.0) 105 yield self._translate(hit) 106 107 def __getitem__(self, index): 108 matches = self.context.get_mset(index, 1) 109 for hit in matches: 110 return self._translate(hit) 111 return matches.next() 112 113 def __getslice__(self, i, j): 114 for hit in self.context.get_mset(i, j - i): 115 yield self._translate(hit) 113 116 114 117 def __len__(self): 115 118 return len(self.context) 119 120 # Internal methods 121 def _translate(self, hit): 122 doc = hit[xapian.MSET_DOCUMENT] 123 terms = doc.termlist() 124 terms.skip_to('Q') 125 uri = terms.next()[0][1:] 126 assert uri, 'uniQue term (URI) not found in document term list' 127 return Hit(uri, document=self.indexer.framework.fetch, 128 did=hit[xapian.MSET_DID], 129 score=float(hit[xapian.MSET_PERCENT]) / 100.0) 130 pyndexter/branches/refactoring/pyndexter/__init__.py
r365 r366 603 603 object. """ 604 604 605 def __init__(self, indexer, query,context):605 def __init__(self, indexer, context): 606 606 self.indexer = indexer 607 self.query = query608 607 self.context = context 609 608 … … 649 648 return key in self.attributes 650 649 650 def __repr__(self): 651 return '<Hit %s>' % ' '.join(['%s=%s' % (k, repr(v)) for k, v in 652 self.attributes.iteritems()]) 653 651 654 def _get_document(self): 652 655 if callable(self._document):
