| 1 |
# -*- coding: utf-8 -*- |
|---|
| 2 |
# |
|---|
| 3 |
# Copyright (C) 2006 Alec Thomas <alec@swapoff.org> |
|---|
| 4 |
# |
|---|
| 5 |
# This software is licensed as described in the file COPYING, which |
|---|
| 6 |
# you should have received as part of this distribution. |
|---|
| 7 |
# |
|---|
| 8 |
|
|---|
| 9 |
""" |
|---|
| 10 |
Hype |
|---|
| 11 |
---- |
|---|
| 12 |
|
|---|
| 13 |
Adapter for Hyperestraier using the Hype bindings. |
|---|
| 14 |
|
|---|
| 15 |
Hype_ is a Python wrapper for Hyperestraier_. Hype is only available through |
|---|
| 16 |
SVN, but is quite stable and functional. |
|---|
| 17 |
|
|---|
| 18 |
.. _Hype: http://hype.python-hosting.com |
|---|
| 19 |
.. _Hyperestraier: http://hyperestraier.sourceforge.net/ |
|---|
| 20 |
|
|---|
| 21 |
Usage |
|---|
| 22 |
~~~~~ |
|---|
| 23 |
|
|---|
| 24 |
:: |
|---|
| 25 |
|
|---|
| 26 |
hype://<path>?hype_mode=<int>&enable_scoring=<bool> |
|---|
| 27 |
|
|---|
| 28 |
|
|---|
| 29 |
``hype_mode`` (default: auto) |
|---|
| 30 |
Override the default ``READONLY``/``READWRITE`` modes in Pyndexter and use |
|---|
| 31 |
Hyperestraier database open modes. See the Hyperestraier docs for details. |
|---|
| 32 |
|
|---|
| 33 |
``enable_scoring`` (default: ``true``) |
|---|
| 34 |
Put Hyperestraier into a debug mode where scores are returned. This is |
|---|
| 35 |
apparently somewhat slower, but I have not observed a massive difference. |
|---|
| 36 |
|
|---|
| 37 |
Installation |
|---|
| 38 |
~~~~~~~~~~~~ |
|---|
| 39 |
|
|---|
| 40 |
Install your distributions Hyperestraier package. |
|---|
| 41 |
|
|---|
| 42 |
:: |
|---|
| 43 |
|
|---|
| 44 |
svn co http://svn.hype.python-hosting.com/trunk hype |
|---|
| 45 |
cd hype |
|---|
| 46 |
python setup.py install |
|---|
| 47 |
""" |
|---|
| 48 |
|
|---|
| 49 |
import os |
|---|
| 50 |
from pyndexter import * |
|---|
| 51 |
import hype |
|---|
| 52 |
|
|---|
| 53 |
|
|---|
| 54 |
__all__ = ['HypeIndexer', 'HypeResult'] |
|---|
| 55 |
|
|---|
| 56 |
|
|---|
| 57 |
class HypeIndexer(Indexer): |
|---|
| 58 |
def __init__(self, framework, path, hype_mode=0, enable_scoring=True): |
|---|
| 59 |
Indexer.__init__(self, framework) |
|---|
| 60 |
self.path = path |
|---|
| 61 |
self.hype_path = os.path.join(self.path, 'hype.db') |
|---|
| 62 |
self.state_path = os.path.join(self.path, 'state.db') |
|---|
| 63 |
self.enable_scoring = enable_scoring |
|---|
| 64 |
|
|---|
| 65 |
if framework.mode == READONLY: |
|---|
| 66 |
hype_mode |= hype.ESTDBREADER |
|---|
| 67 |
elif framework.mode == READWRITE: |
|---|
| 68 |
hype_mode |= hype.ESTDBWRITER |
|---|
| 69 |
if not os.path.exists(self.hype_path): |
|---|
| 70 |
hype_mode |= hype.ESTDBCREAT |
|---|
| 71 |
os.makedirs(self.path) |
|---|
| 72 |
|
|---|
| 73 |
self.db = hype.Database(self.hype_path, hype_mode) |
|---|
| 74 |
|
|---|
| 75 |
def index(self, document): |
|---|
| 76 |
hdoc = hype.Document(unicode(document.uri)) |
|---|
| 77 |
for k, v in document.attributes.iteritems(): |
|---|
| 78 |
if k != 'uri': |
|---|
| 79 |
hdoc['@' + k] = unicode(v) |
|---|
| 80 |
for line in document.content.splitlines(): |
|---|
| 81 |
hdoc.add_text(line) |
|---|
| 82 |
self.db.put_doc(hdoc) |
|---|
| 83 |
|
|---|
| 84 |
def discard(self, uri): |
|---|
| 85 |
doc = self.db.get_doc_by_uri(unicode(uri)) |
|---|
| 86 |
if not doc: |
|---|
| 87 |
raise DocumentNotFound(uri) |
|---|
| 88 |
self.db.remove(doc) |
|---|
| 89 |
|
|---|
| 90 |
def search(self, query): |
|---|
| 91 |
qs = query.as_string(not_='ANDNOT ').decode('utf-8') |
|---|
| 92 |
search = self.db.search(qs) |
|---|
| 93 |
return HypeResult(self, query, search, self.enable_scoring) |
|---|
| 94 |
|
|---|
| 95 |
def optimise(self): |
|---|
| 96 |
self.db.optimize() |
|---|
| 97 |
|
|---|
| 98 |
def fetch(self, uri): |
|---|
| 99 |
doc = self.db.get_doc_by_uri(unicode(uri)) |
|---|
| 100 |
if not doc: |
|---|
| 101 |
raise DocumentNotFound(uri) |
|---|
| 102 |
attributes = self._translate_attributes(doc) |
|---|
| 103 |
return Document(content='\n'.join(doc.texts), quality=0.99, **attributes) |
|---|
| 104 |
|
|---|
| 105 |
def flush(self): |
|---|
| 106 |
self.db.sync() |
|---|
| 107 |
self.db.flush() |
|---|
| 108 |
|
|---|
| 109 |
def close(self): |
|---|
| 110 |
self.db = None |
|---|
| 111 |
|
|---|
| 112 |
# Internal methods |
|---|
| 113 |
def _translate_attributes(self, hdoc): |
|---|
| 114 |
attributes = {} |
|---|
| 115 |
for k in hdoc.attributes: |
|---|
| 116 |
if k[0] == '@': |
|---|
| 117 |
attributes[k[1:]] = hdoc.get(k) |
|---|
| 118 |
else: |
|---|
| 119 |
attributes[k] = hdoc.get(k) |
|---|
| 120 |
attributes['uri'] = URI(attributes['uri']) |
|---|
| 121 |
return attributes |
|---|
| 122 |
|
|---|
| 123 |
|
|---|
| 124 |
indexer_factory = PluginFactory(HypeIndexer, hype_mode=int, |
|---|
| 125 |
enable_scoring=bool) |
|---|
| 126 |
|
|---|
| 127 |
|
|---|
| 128 |
class HypeResult(Result): |
|---|
| 129 |
def __init__(self, indexer, query, context, enable_scoring=True): |
|---|
| 130 |
self.enable_scoring = enable_scoring |
|---|
| 131 |
if enable_scoring: |
|---|
| 132 |
context = context.scores().option(hype.ESTCONDSCFB) |
|---|
| 133 |
Result.__init__(self, indexer, query, context) |
|---|
| 134 |
|
|---|
| 135 |
def __iter__(self): |
|---|
| 136 |
if self.enable_scoring: |
|---|
| 137 |
for doc, score in self.context: |
|---|
| 138 |
yield self._translate(doc, score) |
|---|
| 139 |
else: |
|---|
| 140 |
for doc in self.context: |
|---|
| 141 |
yield self._translate(doc) |
|---|
| 142 |
|
|---|
| 143 |
def __len__(self): |
|---|
| 144 |
return len(self.context) |
|---|
| 145 |
|
|---|
| 146 |
def __getitem__(self, index): |
|---|
| 147 |
doc = self.context[index][0] |
|---|
| 148 |
if self.enable_scoring: |
|---|
| 149 |
score = self.context.get_score(index) |
|---|
| 150 |
else: |
|---|
| 151 |
score = None |
|---|
| 152 |
return self._translate(doc, score) |
|---|
| 153 |
|
|---|
| 154 |
# Internal methods |
|---|
| 155 |
def _translate(self, doc, score=None): |
|---|
| 156 |
attrs = self.indexer._translate_attributes(doc) |
|---|
| 157 |
if self.enable_scoring: |
|---|
| 158 |
if score is None: |
|---|
| 159 |
score = self.context.get_score(index) |
|---|
| 160 |
attrs['score'] = score |
|---|
| 161 |
return Hit(current=self.indexer.framework.fetch, |
|---|
| 162 |
indexed=self.indexer.fetch, **attrs) |
|---|