| 1 |
# -*- coding: utf-8 -*- |
|---|
| 2 |
# |
|---|
| 3 |
# Copyright (C) 2006 Alec Thomas <alec@swapoff.org> |
|---|
| 4 |
# |
|---|
| 5 |
# This software is licensed as described in the file COPYING, which |
|---|
| 6 |
# you should have received as part of this distribution. |
|---|
| 7 |
# |
|---|
| 8 |
|
|---|
| 9 |
""" |
|---|
| 10 |
Hyperestraier |
|---|
| 11 |
------------- |
|---|
| 12 |
|
|---|
| 13 |
Adapter for Hyperestraier_ using the swigged bindings. |
|---|
| 14 |
|
|---|
| 15 |
.. _Hyperestraier: http://hyperestraier.sourceforge.net/ |
|---|
| 16 |
|
|---|
| 17 |
Usage |
|---|
| 18 |
~~~~~ |
|---|
| 19 |
|
|---|
| 20 |
:: |
|---|
| 21 |
|
|---|
| 22 |
hyperestraier://<path>?hype_mode=<int> |
|---|
| 23 |
|
|---|
| 24 |
``hype_mode`` (default: auto) |
|---|
| 25 |
Override the default ``READONLY``/``READWRITE`` modes in Pyndexter and use |
|---|
| 26 |
Hyperestraier database open modes. See the Hyperestraier docs for details. |
|---|
| 27 |
|
|---|
| 28 |
Installation |
|---|
| 29 |
~~~~~~~~~~~~ |
|---|
| 30 |
|
|---|
| 31 |
Install your distributions Hyperestraier package (typically the package |
|---|
| 32 |
``hyperestraier``). |
|---|
| 33 |
|
|---|
| 34 |
If your distribution also includes the SWIG bindings as packages, install |
|---|
| 35 |
these, otherwise: |
|---|
| 36 |
|
|---|
| 37 |
:: |
|---|
| 38 |
|
|---|
| 39 |
wget http://hyperestraier.sourceforge.net/binding/hyper_estraier_wrappers-0.0.15.tar.gz |
|---|
| 40 |
tar xfzv hyper_estraier_wrappers-0.0.15.tar.gz |
|---|
| 41 |
cd hyper_estraier_wrappers-0.0.15 |
|---|
| 42 |
make |
|---|
| 43 |
make install |
|---|
| 44 |
""" |
|---|
| 45 |
|
|---|
| 46 |
import os |
|---|
| 47 |
import hyperestraier |
|---|
| 48 |
from pyndexter import * |
|---|
| 49 |
|
|---|
| 50 |
|
|---|
| 51 |
__all__ = ['HyperestraierIndexer', 'HyperestraierResult'] |
|---|
| 52 |
|
|---|
| 53 |
|
|---|
| 54 |
class HyperestraierIndexer(Indexer): |
|---|
| 55 |
""" Pyndexter adapter for the Hyperestraier indexer. """ |
|---|
| 56 |
def __init__(self, framework, host, path, username='admin', |
|---|
| 57 |
password='admin', port=1978): |
|---|
| 58 |
Indexer.__init__(self, framework) |
|---|
| 59 |
|
|---|
| 60 |
self.db = hyperestraier.Node() |
|---|
| 61 |
self.db.set_url('http://%s:%s/%s' % (host, port, path)) |
|---|
| 62 |
self.db.set_auth(username, password) |
|---|
| 63 |
|
|---|
| 64 |
def index(self, document): |
|---|
| 65 |
hdoc = hyperestraier.Document() |
|---|
| 66 |
for k, v in document.attributes.iteritems(): |
|---|
| 67 |
hdoc.add_attr(u'@' + k, v) |
|---|
| 68 |
for line in document.content.splitlines(): |
|---|
| 69 |
hdoc.add_text(line) |
|---|
| 70 |
self.db.put_doc(hdoc, 1) |
|---|
| 71 |
|
|---|
| 72 |
def discard(self, uri): |
|---|
| 73 |
uri = unicode(uri) |
|---|
| 74 |
if not self.db.out_doc_by_uri(uri): |
|---|
| 75 |
raise DocumentNotFound(uri) |
|---|
| 76 |
|
|---|
| 77 |
def fetch(self, uri): |
|---|
| 78 |
uri = unicode(uri) |
|---|
| 79 |
doc = self.db.get_doc_by_uri(uri) |
|---|
| 80 |
attributes = self._translate_attributes(doc) |
|---|
| 81 |
return Document(content=u'\n'.join([t for t in doc.texts()]), |
|---|
| 82 |
quality=0.99, |
|---|
| 83 |
**attributes) |
|---|
| 84 |
|
|---|
| 85 |
def search(self, query): |
|---|
| 86 |
phrase = query.as_string(not_='ANDNOT ') |
|---|
| 87 |
return self.hype_search(phrase, query, simple=False) |
|---|
| 88 |
|
|---|
| 89 |
def optimise(self): |
|---|
| 90 |
self.db.optimize() |
|---|
| 91 |
|
|---|
| 92 |
def flush(self): |
|---|
| 93 |
self.db.sync() |
|---|
| 94 |
|
|---|
| 95 |
def close(self): |
|---|
| 96 |
self.db.close() |
|---|
| 97 |
self.db = None |
|---|
| 98 |
|
|---|
| 99 |
# Hyperestraier-specific methods |
|---|
| 100 |
def hype_search(self, phrase, query, simple=True, order=None): |
|---|
| 101 |
""" Full Hyperestraier search phrase. """ |
|---|
| 102 |
cond = hyperestraier.Condition() |
|---|
| 103 |
cond.set_phrase(phrase) |
|---|
| 104 |
search = self.db.search(cond, 0) |
|---|
| 105 |
return HyperestraierResult(self, query, search or []) |
|---|
| 106 |
|
|---|
| 107 |
# Internal methods |
|---|
| 108 |
def _translate_attributes(self, hdoc): |
|---|
| 109 |
attributes = {} |
|---|
| 110 |
for k in hdoc.attr_names(): |
|---|
| 111 |
k = k.encode('utf-8') |
|---|
| 112 |
if k[0] == '@': |
|---|
| 113 |
attributes[k[1:]] = hdoc.attr(k) |
|---|
| 114 |
else: |
|---|
| 115 |
attributes[k] = hdoc.attr(k) |
|---|
| 116 |
attributes['uri'] = URI(attributes['uri']) |
|---|
| 117 |
return attributes |
|---|
| 118 |
|
|---|
| 119 |
|
|---|
| 120 |
indexer_factory = PluginFactory(HyperestraierIndexer) |
|---|
| 121 |
|
|---|
| 122 |
|
|---|
| 123 |
class HyperestraierResult(Result): |
|---|
| 124 |
def __iter__(self): |
|---|
| 125 |
for doc in self.context.docs: |
|---|
| 126 |
yield self._translate(doc) |
|---|
| 127 |
|
|---|
| 128 |
def __len__(self): |
|---|
| 129 |
return self.context.doc_num() |
|---|
| 130 |
|
|---|
| 131 |
def __getitem__(self, index): |
|---|
| 132 |
return self._translate(self.context.get_doc[index]) |
|---|
| 133 |
|
|---|
| 134 |
# Internal methods |
|---|
| 135 |
def _translate(self, doc): |
|---|
| 136 |
return Hit(current=self.indexer.framework.fetch, |
|---|
| 137 |
indexed=self.indexer.fetch, |
|---|
| 138 |
**self.indexer._translate_attributes(doc)) |
|---|