Changeset 466
- Timestamp:
- 11/26/07 08:09:26 (11 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pyndexter/branches/simplification/pyndexter/indexers/_hyperestraier.py
r465 r466 78 78 79 79 def index(self, document): 80 uri = unicode(document.uri)80 key = unicode(document.key) 81 81 hdoc = hyperestraier.Document() 82 82 for k, v in document.iteritems(): 83 83 hdoc.add_attr(u'@' + k, v) 84 hdoc.add_attr(u'@uri', uri)84 hdoc.add_attr(u'@uri', key) 85 85 for line in document.texts: 86 86 hdoc.add_text(line) 87 87 if not self.db.put_doc(hdoc): 88 raise errors.IndexerError('Failed to index %s' % document. uri)88 raise errors.IndexerError('Failed to index %s' % document.key) 89 89 90 def discard(self, uri): 91 uri = unicode(uri) 92 if not self.db.out_doc_by_uri(uri): 93 raise errors.DocumentNotFound(uri) 90 def discard(self, key): 91 key = unicode(key) 92 try: 93 self.db.out_doc_by_uri(key) 94 except hyperestraier.HyperestraierError, e: 95 raise errors.DocumentNotFound(key) 94 96 95 def fetch(self, uri):96 uri = unicode(uri)97 doc = self.db.get_doc_by_uri( uri)97 def fetch(self, key): 98 key = unicode(key) 99 doc = self.db.get_doc_by_uri(key) 98 100 if not doc: 99 raise errors.DocumentNotFound( uri)101 raise errors.DocumentNotFound(key) 100 102 attributes = self._translate_attributes(doc) 101 return Document( uri, texts=doc.dtexts, quality=0.99,103 return Document(key, texts=doc.dtexts, quality=0.99, 102 104 attributes=attributes) 103 105 … … 152 154 # Internal methods 153 155 def _translate(self, doc): 154 return Hit( uri=doc.uri, document=self.indexer.fetch,156 return Hit(key=doc.uri, document=self.indexer.fetch, 155 157 attributes=self.indexer._translate_attributes(doc)) pyndexter/branches/simplification/pyndexter/__init__.py
r465 r466 59 59 Defines three useful attributes: 60 60 61 `` uri``62 The unique URIfor this document.61 ``key`` 62 The unique key for this document. 63 63 64 64 ``quality`` … … 66 66 """ 67 67 68 def __init__(self, uri, quality=1.0, attributes=None, text=None,68 def __init__(self, key, quality=1.0, attributes=None, text=None, 69 69 texts=None): 70 self. uri = URI(uri)70 self.key = key 71 71 self.quality = quality 72 72 self._attributes = {} … … 121 121 122 122 def __hash__(self): 123 return hash(self. uri)123 return hash(self.key) 124 124 125 125 … … 142 142 raise NotImplementedError 143 143 144 def discard(self, uri):144 def discard(self, key): 145 145 """ Discard a document. """ 146 146 raise NotImplementedError … … 156 156 raise NotImplementedError 157 157 158 def fetch(self, uri):158 def fetch(self, key): 159 159 """Attempt to fetch indexer representation of the document. 160 160 … … 162 162 and 1.0, representing the quality of the document in comparison to the 163 163 original.""" 164 raise DocumentNotFound( uri)164 raise DocumentNotFound(key) 165 165 166 166 def replace(self, document): 167 167 """Replace a document in the index. Default is to `discard()` and 168 168 `index()`.""" 169 self.discard(document. uri)169 self.discard(document.key) 170 170 self.index(document) 171 171 … … 227 227 A Document object corresponding to the Hit. 228 228 229 `` uri``230 The URIof the hit.229 ``key`` 230 The key of the hit. 231 231 """ 232 232 233 def __init__(self, uri, document=None, score=0.0, attributes=None):234 self. uri = uri233 def __init__(self, key, document=None, score=0.0, attributes=None): 234 self.key = key 235 235 if callable(document): 236 self.get_document = lambda: document( uri)236 self.get_document = lambda: document(key) 237 237 else: 238 238 self._document = document … … 268 268 269 269 def __hash__(self): 270 return hash(self. uri)270 return hash(self.key) 271 271 272 272 … … 289 289 return iter(self.indexer) 290 290 291 def discard(self, uri):292 if isinstance( uri, Document):293 uri = uri.uri294 return self.indexer.discard( uri)291 def discard(self, key): 292 if isinstance(key, Document): 293 key = key.key 294 return self.indexer.discard(key) 295 295 296 296 def search(self, query): … … 299 299 return self.indexer.search(query) 300 300 301 def fetch(self, uri):302 return self.indexer.fetch( URI(uri))301 def fetch(self, key): 302 return self.indexer.fetch(key) 303 303 304 304 pyndexter/branches/simplification/setup.py
r465 r466 28 28 ], 29 29 extras_require={ 30 'hyperestraier': ['hyperestraier>=0. 9.3'],30 'hyperestraier': ['hyperestraier>=0.10.4'], 31 31 'snowball': ['PyStemmer>=1.0.1'], 32 32 # TODO put more indexer requirements in here
