Changeset 365
- Timestamp:
- 01/16/07 17:02:28 (2 years ago)
- Files:
-
- pyndexter/branches/refactoring/pyndexter/indexers/hype.py (modified) (4 diffs)
- pyndexter/branches/refactoring/pyndexter/indexers/hyperestraier.py (modified) (2 diffs)
- pyndexter/branches/refactoring/pyndexter/__init__.py (modified) (5 diffs)
- pyndexter/branches/refactoring/pyndexter/util.py (modified) (2 diffs)
- pyndexter/branches/refactoring/.todo (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pyndexter/branches/refactoring/pyndexter/indexers/hype.py
r364 r365 21 21 22 22 class HypeIndexer(Indexer): 23 def __init__(self, framework, path, hype_mode=0 ):23 def __init__(self, framework, path, hype_mode=0, enable_scoring=True): 24 24 Indexer.__init__(self, framework) 25 25 self.path = path 26 26 self.hype_path = os.path.join(self.path, 'hype.db') 27 27 self.state_path = os.path.join(self.path, 'state.db') 28 self.enable_scoring = enable_scoring 28 29 29 30 if framework.mode == READONLY: … … 56 57 # if order is not None: 57 58 # search = search.order(order) 58 return HypeResult(self, query, search )59 return HypeResult(self, query, search, self.enable_scoring) 59 60 60 61 def optimise(self): … … 97 98 98 99 99 indexer_factory = ComponentFactory(HypeIndexer, hype_mode=int) 100 indexer_factory = ComponentFactory(HypeIndexer, hype_mode=int, 101 enable_scoring=bool) 100 102 101 103 102 104 class HypeResult(Result): 103 def __init__(self, indexer, phrase, context): 104 context = context.scores().option(hype.ESTCONDSCFB) 105 def __init__(self, indexer, phrase, context, enable_scoring=True): 106 if enable_scoring: 107 context = context.scores().option(hype.ESTCONDSCFB) 105 108 Result.__init__(self, indexer, phrase, context) 106 109 … … 115 118 116 119 def __getitem__(self, index): 117 return self.context[index][ '@uri']120 return self.context[index][0]['@uri'] 118 121 pyndexter/branches/refactoring/pyndexter/indexers/hyperestraier.py
r364 r365 145 145 for id in self.context: 146 146 doc = self.indexer.db.get_doc(id, 0) 147 # How do we get the score?148 147 yield Hit(document=self.indexer.framework.fetch, 149 148 **self.indexer._translate_attributes(doc)) … … 153 152 154 153 def __getitem__(self, index): 154 print type(self.context) 155 print dir(self.context) 155 156 return self.context[index]['@uri'] pyndexter/branches/refactoring/pyndexter/__init__.py
r364 r365 64 64 class InvalidQuery(Error): 65 65 """ Invalid query string. """ 66 class FrameworkError(Error): 67 """Base of Framework errors.""" 68 class InvalidModule(FrameworkError): 69 """The module provided was not loadable.""" 70 def __init__(self, module, exception=None): 71 message = 'Could not load module "%s"' % module 72 if exception: 73 message += '. Original exception was: %s' % exception 74 FrameworkError.__init__(self, message) 66 75 67 76 … … 489 498 yield uri 490 499 491 def update(self): 492 """ Update the index with the current state of the document source. """ 500 def update(self, filter=None, context=None): 501 """ Update the index with the current state of the document source. 502 503 `filter` is a callable in the form `(context, stream)`, where `stream` 504 is an iterable of `(transition, uri)` pairs.""" 493 505 self._assert_rw() 494 506 if not self.state_store: 495 507 raise IndexerError("Source state storage path not defined, " 496 "Framework is not capable of automatic updates.") 508 "Framework is not capable of automatic " 509 "updates.") 497 510 if self.state_store.exists(): 498 511 store = self.state_store.retrieve() 499 for transition, uri in self.source.difference(store): 512 if not filter: 513 def filter(context, stream): 514 for transition, uri in stream: 515 yield transition, uri 516 for transition, uri in filter(context, 517 self.source.difference(store)): 500 518 if transition == REMOVED: 501 519 self.discard(uri) … … 554 572 # Helper methods 555 573 def _load_plugin(self, type, uri, args={}): 556 from pyndexter.util import uri_parse574 from pyndexter.util import URI 557 575 # Extract URI components 558 scheme, username, password, netloc, path, query, fragment = \559 uri_parse(uri)560 u sername = usernameor None561 password = password or None562 uri_components = {'username': username, 'password': password,563 ' netloc': netloc, 'path': path, 'fragment':fragment}576 uri = URI(uri) 577 uri.username = uri.username or None 578 uri.password = uri.password or None 579 uri_components = {'username': uri.username, 'password': uri.password, 580 'host': uri.host, 'path': uri.path, 581 'fragment': uri.fragment} 564 582 # Discard them if they're empty 565 583 uri_components = dict([(k, v) for k, v in uri_components.iteritems() if v]) 566 query.update(uri_components) 567 query.update(args) 568 module = __import__('pyndexter.%ss.%s' % (type, scheme), 569 {}, {}, ['']) 584 uri.query.update(uri_components) 585 uri.query.update(args) 586 try: 587 module_name = 'pyndexter.%ss.%s' % (type, uri.scheme) 588 module = __import__(module_name, {}, {}, ['']) 589 except ImportError, e: 590 raise InvalidModule(module_name, e) 570 591 indexer_factory = getattr(module, type + '_factory') 571 592 assert isinstance(indexer_factory, ComponentFactory) 572 return indexer_factory(self, ** query)593 return indexer_factory(self, **uri.query) 573 594 574 595 def _assert_rw(self): … … 614 635 __slots__ = ('attributes', '_document') 615 636 616 def __init__(self, uri, document=None, score=None,**attributes):637 def __init__(self, uri, document=None, **attributes): 617 638 self._document = document 618 639 self.attributes = attributes 619 self.attributes .update({'uri': uri, 'score': score})640 self.attributes['uri'] = uri 620 641 621 642 def __getattr__(self, key): … … 625 646 raise AttributeError(unicode(e)) 626 647 648 def __contains__(self, key): 649 return key in self.attributes 650 627 651 def _get_document(self): 628 652 if callable(self._document): pyndexter/branches/refactoring/pyndexter/util.py
r364 r365 8 8 9 9 import time 10 import re 10 11 from UserDict import DictMixin 11 12 try: … … 57 58 58 59 59 def uri_parse(uri):60 """ Parse a URI into its component parts. The query is passed through61 `cgi.parse_qs()`.60 class URI(object): 61 """Parse a URI into its component parts. The `query` component is passed 62 through `cgi.parse_qs()`. 62 63 63 scheme://username:password@netloc/path?query#fragment64 scheme://username:password@host/path?query#fragment 64 65 65 TODO: Support "parameters???" Never seen this: 66 scheme://username:password@netloc/path;parameters?query#fragment 66 Each component is available as an attribute of the object. 67 68 TODO: Support "parameters???" Never seen this in the wild: 69 scheme://username:password@host/path;parameters?query#fragment 67 70 68 71 PS. `urlparse` is not useful. """ 69 from cgi import parse_qs70 import re71 72 72 global uri_parse 73 if not hasattr(uri_parse, '_pattern'): 74 uri_parse._pattern = re.compile(r'(?P<scheme>[^:]+)://(?:(?P<username>[^:@]*)(?::(?P<password>[^@]*))?@)?(?P<netloc>[^/]*)(?P<path>/[^#?]*)?(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?') 73 _pattern = re.compile(r'(?P<scheme>[^:]+)://(?:(?P<username>[^:@]*)(?::(?P<password>[^@]*))?@)?(?P<host>[^/]*)(?P<path>/[^#?]*)?(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?') 75 74 76 match = uri_parse._pattern.match(uri) 77 if match is None: 78 raise ValueError('Invalid URI') 79 groups = match.groups() 80 return groups[0:5] + (parse_qs(groups[5] or ''),) + groups[6:] 75 __slots__ = ('scheme', 'username', 'password', 'host', 'path', 'query', 76 'fragment') 77 78 def __init__(self, uri=None): 79 if uri is not None: 80 from cgi import parse_qs 81 82 match = self._pattern.match(uri) 83 if match is None: 84 raise ValueError('Invalid URI') 85 groups = match.groups() 86 groups = groups[0:5] + (parse_qs(groups[5] or ''),) + groups[6:] 87 groups = [group or '' for group in groups] 88 if not groups[5]: 89 groups[5] = {} 90 else: 91 groups = [''] * 7 92 self.scheme, self.username, self.password, self.host, self.path, \ 93 self.query, self.fragment = groups 94 95 def __repr__(self): 96 uri = self.scheme + '://' 97 if self.username or self.password: 98 if self.username: 99 uri += self.username 100 if self.password: 101 uri += ':' + self.password 102 uri += '@' 103 uri += self.host + self.path 104 if self.query: 105 uri += '?' + '&'.join(['&'.join(['%s=%s' % (k, v) for v in l]) 106 for k, l in self.query.items()]) 107 if self.fragment: 108 uri += '#' + self.fragment 109 return uri pyndexter/branches/refactoring/.todo
r364 r365 31 31 </note> 32 32 <note priority="medium" time="1168868728"> 33 Add slicing on Searchobjects. This will allow fast pagination in result displays.33 Add slicing to Result objects. This will allow fast pagination in result displays. 34 34 </note> 35 35 <note priority="low" time="1168875038">
