root/pyndexter/trunk/pyndexter/indexers/_hyperestraier.py

Revision 453, 3.8 kB (checked in by athomas, 1 year ago)

pyndexter: Moved to hyperestraier pure-Python module, fixed setup.py.

Line 
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2006 Alec Thomas <alec@swapoff.org>
4 #
5 # This software is licensed as described in the file COPYING, which
6 # you should have received as part of this distribution.
7 #
8
9 """
10 Hyperestraier
11 -------------
12
13 Adapter for Hyperestraier_ using the swigged bindings.
14
15 .. _Hyperestraier: http://hyperestraier.sourceforge.net/
16
17 Usage
18 ~~~~~
19
20 ::
21
22     hyperestraier://<path>?hype_mode=<int>
23
24 ``hype_mode`` (default: auto)
25     Override the default ``READONLY``/``READWRITE`` modes in Pyndexter and use
26     Hyperestraier database open modes. See the Hyperestraier docs for details.
27
28 Installation
29 ~~~~~~~~~~~~
30
31 Install your distributions Hyperestraier package (typically the package
32 ``hyperestraier``).
33
34 If your distribution also includes the SWIG bindings as packages, install
35 these, otherwise:
36
37 ::
38
39     wget http://hyperestraier.sourceforge.net/binding/hyper_estraier_wrappers-0.0.15.tar.gz
40     tar xfzv hyper_estraier_wrappers-0.0.15.tar.gz
41     cd hyper_estraier_wrappers-0.0.15
42     make
43     make install
44 """
45
46 import os
47 import hyperestraier
48 from pyndexter import *
49
50
51 __all__ = ['HyperestraierIndexer', 'HyperestraierResult']
52
53
54 class HyperestraierIndexer(Indexer):
55     """ Pyndexter adapter for the Hyperestraier indexer. """
56     def __init__(self, framework, host, path, username='admin',
57                  password='admin', port=1978):
58         Indexer.__init__(self, framework)
59
60         self.db = hyperestraier.Node()
61         self.db.set_url('http://%s:%s/%s' % (host, port, path))
62         self.db.set_auth(username, password)
63
64     def index(self, document):
65         hdoc = hyperestraier.Document()
66         for k, v in document.attributes.iteritems():
67             hdoc.add_attr(u'@' + k, v)
68         for line in document.content.splitlines():
69             hdoc.add_text(line)
70         self.db.put_doc(hdoc, 1)
71
72     def discard(self, uri):
73         uri = unicode(uri)
74         if not self.db.out_doc_by_uri(uri):
75             raise DocumentNotFound(uri)
76
77     def fetch(self, uri):
78         uri = unicode(uri)
79         doc = self.db.get_doc_by_uri(uri)
80         attributes = self._translate_attributes(doc)
81         return Document(content=u'\n'.join([t for t in doc.texts()]),
82                         quality=0.99,
83                         **attributes)
84
85     def search(self, query):
86         phrase = query.as_string(not_='ANDNOT ')
87         return self.hype_search(phrase, query, simple=False)
88
89     def optimise(self):
90         self.db.optimize()
91
92     def flush(self):
93         self.db.sync()
94
95     def close(self):
96         self.db.close()
97         self.db = None
98
99     # Hyperestraier-specific methods
100     def hype_search(self, phrase, query, simple=True, order=None):
101         """ Full Hyperestraier search phrase. """
102         cond = hyperestraier.Condition()
103         cond.set_phrase(phrase)
104         search = self.db.search(cond, 0)
105         return HyperestraierResult(self, query, search or [])
106
107     # Internal methods
108     def _translate_attributes(self, hdoc):
109         attributes = {}
110         for k in hdoc.attr_names():
111             k = k.encode('utf-8')
112             if k[0] == '@':
113                 attributes[k[1:]] = hdoc.attr(k)
114             else:
115                 attributes[k] = hdoc.attr(k)
116         attributes['uri'] = URI(attributes['uri'])
117         return attributes
118
119
120 indexer_factory = PluginFactory(HyperestraierIndexer)
121
122
123 class HyperestraierResult(Result):
124     def __iter__(self):
125         for doc in self.context.docs:
126             yield self._translate(doc)
127
128     def __len__(self):
129         return self.context.doc_num()
130
131     def __getitem__(self, index):
132         return self._translate(self.context.get_doc[index])
133
134     # Internal methods
135     def _translate(self, doc):
136         return Hit(current=self.indexer.framework.fetch,
137                    indexed=self.indexer.fetch,
138                    **self.indexer._translate_attributes(doc))
Note: See TracBrowser for help on using the browser.