Changeset 465
- Timestamp:
- 11/26/2007 07:40:47 AM (8 months ago)
- Files:
-
- pyndexter/branches/simplification/pyndexter/indexers/_hyperestraier.py (modified) (4 diffs)
- pyndexter/branches/simplification/pyndexter/__init__.py (modified) (9 diffs)
- pyndexter/branches/simplification/pyndexter/query.py (modified) (1 diff)
- pyndexter/branches/simplification/pyndexter/util.py (modified) (6 diffs)
- pyndexter/branches/simplification/setup.py (modified) (2 diffs)
- pyndexter/branches/simplification/.todo (added)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pyndexter/branches/simplification/pyndexter/indexers/_hyperestraier.py
r458 r465 48 48 import hyperestraier 49 49 from pyndexter import * 50 from pyndexter import errors 50 51 from pyndexter.util import URI 51 52 … … 59 60 uri = URI(uri) 60 61 uri.scheme = 'http' 62 uri.path = 'node/' + uri.path 61 63 return HyperestraierIndexer(uri) 62 64 … … 68 70 69 71 uri = URI(uri, port=1978) 72 scrubbed = URI(uri) 73 scrubbed.username = None 74 scrubbed.password = None 70 75 self.db = hyperestraier.Node() 71 self.db.set_url(str(uri)) 76 self.db.set_url(str(scrubbed)) 77 self.db.set_auth(uri.username, uri.password) 72 78 73 79 def index(self, document): 80 uri = unicode(document.uri) 74 81 hdoc = hyperestraier.Document() 75 for k, v in document. attributes.iteritems():82 for k, v in document.iteritems(): 76 83 hdoc.add_attr(u'@' + k, v) 77 for line in document.content.splitlines(): 84 hdoc.add_attr(u'@uri', uri) 85 for line in document.texts: 78 86 hdoc.add_text(line) 79 self.db.put_doc(hdoc, 1) 87 if not self.db.put_doc(hdoc): 88 raise errors.IndexerError('Failed to index %s' % document.uri) 80 89 81 90 def discard(self, uri): 82 91 uri = unicode(uri) 83 92 if not self.db.out_doc_by_uri(uri): 84 raise DocumentNotFound(uri)93 raise errors.DocumentNotFound(uri) 85 94 86 95 def fetch(self, uri): 87 96 uri = unicode(uri) 88 97 doc = self.db.get_doc_by_uri(uri) 98 if not doc: 99 raise errors.DocumentNotFound(uri) 89 100 attributes = self._translate_attributes(doc) 90 101 return Document(uri, texts=doc.dtexts, quality=0.99, … … 98 109 self.db.optimize() 99 110 100 def flush(self):101 self.db.sync()102 103 111 def close(self): 104 self. db.close()112 self.flush() 105 113 self.db = None 106 114 pyndexter/branches/simplification/pyndexter/__init__.py
r459 r465 18 18 from StringIO import StringIO 19 19 from UserDict import DictMixin 20 from pyndexter.errors import *21 20 from pyndexter.util import URI 22 21 from pyndexter.query import Query … … 34 33 35 34 __all__ = ['Document', 'Query', 'Hit', 'Indexer', 'ResultSet', 'Stemmer', 36 'Error', 'DocumentNotFound', 'InvalidURI', 'InvalidIndexer', 37 'connect'] 35 'connect', 'URI'] 38 36 39 37 … … 46 44 47 45 48 class Error(Exception):49 """Base of all pyndexter exceptions."""50 51 class DocumentNotFound(Error):52 """Raised when a document could not be found, usually by the fetch()53 methods."""54 55 class PluginError(Error):56 """An error occurred in the plugin system."""57 58 59 46 class Stemmer(object): 60 47 """Abstraction for a stemming algorithm.""" … … 81 68 def __init__(self, uri, quality=1.0, attributes=None, text=None, 82 69 texts=None): 83 self.uri = uri70 self.uri = URI(uri) 84 71 self.quality = quality 85 72 self._attributes = {} … … 206 193 def __len__(self): 207 194 """ Return the length of the result set. """ 208 raise NotImplementedError 195 count = 0 196 for i in self: 197 count += 1 198 return count 209 199 210 200 def __getitem__(self, index): 211 201 """Return a Hit object for a specific index in the search result. 212 202 Not necessarily implemented by all Indexers.""" 213 raise NotImplementedError 203 for i, value in enumerate(self): 204 if i == index: 205 return value 206 raise IndexError(index) 214 207 215 208 def __getslice__(self, i, j): … … 279 272 280 273 class IndexerWrapper(Indexer): 281 """An Indexer wrapper that does type translation for end users.""" 274 """An Indexer wrapper that does some convenient type conversion for end 275 users.""" 282 276 def __init__(self, indexer): 283 277 Indexer.__init__(self) … … 296 290 297 291 def discard(self, uri): 298 if isinstance(uri, document):299 uri = document.uri300 return self.indexer.discard( URI(uri))292 if isinstance(uri, Document): 293 uri = uri.uri 294 return self.indexer.discard(uri) 301 295 302 296 def search(self, query): … … 309 303 310 304 311 def load_plugin(name,entry_point, plugin_paths=None):305 def iter_plugins(entry_point, plugin_paths=None): 312 306 import pkg_resources 313 307 … … 340 334 341 335 for entry in pkg_resources.working_set.iter_entry_points(entry_point): 342 if entry.name == name: 343 try: 344 _debug('Loading %s' % entry) 345 factory = entry.load(require=True) 346 return factory 347 except (ImportError, pkg_resources.DistributionNotFound, 348 pkg_resources.VersionConflict, pkg_resources.UnknownExtra), e: 349 _debug('Failed to load %s: %s' % (name, _format_error(entry, e))) 350 raise PluginError('%s (%s)' % (name, _format_error(entry, e))) 351 raise PluginError('No suitable plugins found for %s' % name) 336 try: 337 _debug('Loading %s' % entry) 338 plugin = entry.load(require=True) 339 yield entry.name, plugin 340 except (ImportError, pkg_resources.DistributionNotFound, 341 pkg_resources.VersionConflict, pkg_resources.UnknownExtra), e: 342 _debug('Failed to load %s: %s' % (entry.name, _format_error(entry, e))) 343 yield None, PluginError('%s (%s)' % (entry.name, _format_error(entry, e))) 344 345 346 def load_plugin(plugin_name, entry_point, plugin_paths=None): 347 for name, plugin in iter_plugins(entry_point, plugin_paths): 348 if name is None: 349 raise plugin 350 if name == plugin_name: 351 return plugin 352 raise PluginError('No suitable plugins found named "%s"' % name) 352 353 353 354 pyndexter/branches/simplification/pyndexter/query.py
r458 r465 9 9 10 10 import re 11 from pyndexter import Error11 from pyndexter.errors import InvalidQuery 12 12 13 13 14 14 __all__ = ['Query'] 15 16 17 class InvalidQuery(Error):18 """Invalid query string."""19 15 20 16 pyndexter/branches/simplification/pyndexter/util.py
r457 r465 25 25 """.split() 26 26 27 27 28 class URI(object): 28 29 """Parse a URI into its component parts. The `query` component is passed … … 40 41 The URI constructor can be passed a string: 41 42 42 >>> u = URI('http://user:password@www.example.com /some/path?parm=1&parm=2&other=3#fragment')43 >>> u = URI('http://user:password@www.example.com:12345/some/path?parm=1&parm=2&other=3#fragment') 43 44 >>> u 44 URI(u'http://user:password@www.example.com /some/path?other=3&parm=1&parm=2#fragment')45 URI(u'http://user:password@www.example.com:12345/some/path?other=3&parm=1&parm=2#fragment') 45 46 >>> u.scheme 46 47 'http' … … 60 61 ...or the individual URI components as keyword arguments: 61 62 62 >>> URI(scheme='http', username='user', password='password', host='www.example.com', p ath='/some/path', query={'parm': [1, 2], 'other': [3]}, fragment='fragment')63 URI(u'http://user:password@www.example.com /some/path?other=3&parm=1&parm=2#fragment')63 >>> URI(scheme='http', username='user', password='password', host='www.example.com', port=12345, path='/some/path', query={'parm': [1, 2], 'other': [3]}, fragment='fragment') 64 URI(u'http://user:password@www.example.com:12345/some/path?other=3&parm=1&parm=2#fragment') 64 65 65 66 ...or finally, another URI object: … … 71 72 False 72 73 >>> v 73 URI(u'http://user:password@www.example.com /some/path?other=3&parm=1&parm=2#fragment')74 URI(u'http://user:password@www.example.com:12345/some/path?other=3&parm=1&parm=2#fragment') 74 75 75 76 URI also normalises the path component: … … 79 80 """ 80 81 81 _pattern = re.compile(r'(?:(?P<scheme>[^:]+)://)?(?:(?P<username>[^:@]*)(?::(?P<password>[^@]*))?@)?(?P<host>[^?/#:]*)(?::(P<port>[\d+]+))?(?P<path>/[^#?]*)?(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?') 82 83 __slots__ = ('scheme', 'username', 'password', 'host', 'port', '_path', 84 'query', 'fragment') 82 _pattern = re.compile(r""" 83 (?:(?P<scheme>[^:]+)://)? 84 (?:(?P<username>[^:@]*) 85 (?::(?P<password>[^@]*))?@)? 86 (?P<host>[^?/#:]*) 87 (?::(?P<port>[^/]+))? 88 (?P<path>/[^#?]*)? 89 (?:\?(?P<query>[^#]*))? 90 (?:\#(?P<fragment>.*))? 91 """, re.VERBOSE) 92 93 __slots__ = ['scheme', 'username', 'password', 'host', 'port', '_path', 94 'query', 'fragment'] 85 95 86 96 def __init__(self, uri=None, scheme=None, username=None, password=None, … … 155 165 156 166 def __str__(self): 167 return unicode(self).encode('utf-8') 168 169 def __unicode__(self): 157 170 uri = unicode(self.scheme and (quote(self.scheme) + u'://') or u'') 158 171 if self.username or self.password: pyndexter/branches/simplification/setup.py
r459 r465 4 4 name='pyndexter', 5 5 description="An abstraction layer for full-text indexing engines.", 6 long_description="""Pyndexter (pronounced 'poindexter') is an abstraction7 layer for full-text indexing and search engines. It presents a uniform8 query syntax to the user, includes a basic but functional pure-Python9 indexer, and has adapters for Hype, Hyperestraier, Lucene, Lupy,10 Pyndex, Swish-e andXapian.""",6 long_description="""Pyndexter is an abstraction layer for full-text 7 indexing and search engines. It presents a uniform query syntax to the 8 user, includes a basic but functional pure-Python indexer, and has 9 adapters for Hype, Hyperestraier, Lucene, Lupy, Pyndex, Swish-e and 10 Xapian.""", 11 11 url='http://swapoff.org/pyndexter', 12 12 download_url='http://swapoff.org/pyndexter', … … 16 16 author_email='alec@swapoff.org', 17 17 version='0.4', 18 #test_suite='pyndexter.test.suite',18 test_suite='pyndexter.tests.suite', 19 19 classifiers=['Development Status :: 3 - Alpha', 20 20 'Environment :: Plugins',
