Changeset 453
- Timestamp:
- 08/22/07 06:42:12 (11 months ago)
- Files:
-
- pyndexter/trunk/pyndexter/indexers/_hyperestraier.py (modified) (4 diffs)
- pyndexter/trunk/pyndexter/__init__.py (modified) (3 diffs)
- pyndexter/trunk/setup.py (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pyndexter/trunk/pyndexter/indexers/_hyperestraier.py
r452 r453 45 45 46 46 import os 47 import HyperEstraier47 import hyperestraier 48 48 from pyndexter import * 49 49 … … 54 54 class HyperestraierIndexer(Indexer): 55 55 """ Pyndexter adapter for the Hyperestraier indexer. """ 56 def __init__(self, framework, path, hype_mode=None): 56 def __init__(self, framework, host, path, username='admin', 57 password='admin', port=1978): 57 58 Indexer.__init__(self, framework) 58 self.hype_mode = hype_mode59 59 60 path = path.encode('utf-8') 61 self.path = path 62 self.db_path = os.path.join(path, 'hyperestraier.db').encode('utf-8') 63 self.state_path = os.path.join(path, 'state.db') 64 65 if framework.mode == READWRITE: 66 if not os.path.exists(self.path): 67 os.makedirs(self.path) 68 69 if self.hype_mode is None: 70 self.hype_mode = HyperEstraier.Database.DBREADER 71 if self.framework.mode == READWRITE: 72 self.hype_mode |= HyperEstraier.Database.DBWRITER|HyperEstraier.Database.DBCREAT 73 74 self.db = HyperEstraier.Database() 75 self.db.open(self.db_path, self.hype_mode) 60 self.db = hyperestraier.Node() 61 self.db.set_url('http://%s:%s/%s' % (host, port, path)) 62 self.db.set_auth(username, password) 76 63 77 64 def index(self, document): 78 hdoc = HyperEstraier.Document()65 hdoc = hyperestraier.Document() 79 66 for k, v in document.attributes.iteritems(): 80 hdoc.add_attr(unicode('@' + k).encode('utf-8'), 81 unicode(v).encode('utf-8')) 67 hdoc.add_attr(u'@' + k, v) 82 68 for line in document.content.splitlines(): 83 hdoc.add_text(line .encode('utf-8'))69 hdoc.add_text(line) 84 70 self.db.put_doc(hdoc, 1) 85 71 86 72 def discard(self, uri): 87 uuri = unicode(uri).encode('utf-8') 88 id = self.db.uri_to_id(uuri) 89 if id == -1: 73 uri = unicode(uri) 74 if not self.db.out_doc_by_uri(uri): 90 75 raise DocumentNotFound(uri) 91 self.db.out_doc(id, HyperEstraier.Database.ODCLEAN)92 76 93 77 def fetch(self, uri): 94 uuri = unicode(uri).encode('utf-8') 95 id = self.db.uri_to_id(uuri) 96 if id == -1: 97 raise DocumentNotFound(uri) 98 doc = self.db.get_doc(id, 0) 78 uri = unicode(uri) 79 doc = self.db.get_doc_by_uri(uri) 99 80 attributes = self._translate_attributes(doc) 100 return Document(content=u'\n'.join([t.decode('utf-8') 101 for t in doc.texts()]), 81 return Document(content=u'\n'.join([t for t in doc.texts()]), 102 82 quality=0.99, 103 83 **attributes) … … 120 100 def hype_search(self, phrase, query, simple=True, order=None): 121 101 """ Full Hyperestraier search phrase. """ 122 cond = HyperEstraier.Condition()123 cond.set_phrase(phrase .encode('utf-8'))102 cond = hyperestraier.Condition() 103 cond.set_phrase(phrase) 124 104 search = self.db.search(cond, 0) 125 return HyperestraierResult(self, query, search )105 return HyperestraierResult(self, query, search or []) 126 106 127 107 # Internal methods … … 129 109 attributes = {} 130 110 for k in hdoc.attr_names(): 111 k = k.encode('utf-8') 131 112 if k[0] == '@': 132 attributes[k[1:]] = hdoc.attr(k) .decode('utf-8')113 attributes[k[1:]] = hdoc.attr(k) 133 114 else: 134 attributes[k] = hdoc.attr(k) .decode('utf-8')115 attributes[k] = hdoc.attr(k) 135 116 attributes['uri'] = URI(attributes['uri']) 136 117 return attributes 137 118 138 119 139 140 indexer_factory = PluginFactory(HyperestraierIndexer, hype_mode=int) 120 indexer_factory = PluginFactory(HyperestraierIndexer) 141 121 142 122 143 123 class HyperestraierResult(Result): 144 124 def __iter__(self): 145 for id in self.context:146 yield self._translate( id)125 for doc in self.context.docs: 126 yield self._translate(doc) 147 127 148 128 def __len__(self): 149 return len(self.context)129 return self.context.doc_num() 150 130 151 131 def __getitem__(self, index): 152 return self._translate(self.context [index])132 return self._translate(self.context.get_doc[index]) 153 133 154 134 # Internal methods 155 def _translate(self, id): 156 doc = self.indexer.db.get_doc(id, 0) 135 def _translate(self, doc): 157 136 return Hit(current=self.indexer.framework.fetch, 158 137 indexed=self.indexer.fetch, pyndexter/trunk/pyndexter/__init__.py
r452 r453 225 225 text = '%s(%s:"%s"' % (' ' * depth, node.value[0], node.value[1]) 226 226 else: 227 text = "%s(%s%s" % (' ' * depth, type_map[node.type], node.value and ' "%s"' % (node.value,) or "") 227 text = "%s(%s%s" % (' ' * depth, type_map[node.type], 228 node.value and ' "%s"' % (node.value,) or "") 228 229 if node.left or node.right: 229 230 text += "\n" … … 467 468 """Compact all words in a block of text.""" 468 469 469 def __init__(self, words_re=r e.compile(r'\w+'), stemmer=lambda w: w,470 def __init__(self, words_re=r'\w+', stemmer=lambda w: w, 470 471 min_word_length=3, max_word_length=64, unique=False, 471 472 split=False, lower=True): … … 485 486 words_re = re.compile(words_re, re.UNICODE) 486 487 self.words_re = words_re 487 self.stemmer = stemmer 488 self.stemmer = stemmer or lambda w: w 488 489 self.min_word_length = min_word_length 489 490 self.max_word_length = max_word_length pyndexter/trunk/setup.py
r401 r453 26 26 'Operating System :: OS Independent', 27 27 'Topic :: Software Development :: Libraries'], 28 packages=['pyndexter', 'pyndexter.indexers', 'pyndexter.s ources',29 'pyndexter. stemmers', 'pyndexter.tests'])28 packages=['pyndexter', 'pyndexter.indexers', 'pyndexter.stemmers', 29 'pyndexter.tests'])
