root/pyndexter/trunk/pyndexter/indexers/_hype.py

Revision 452, 4.6 kB (checked in by athomas, 1 year ago)

pyndexter: All modules are now prefixed with _ to avoid import collisions. Updated unit tests.

  • Property svn:eol-style set to native
Line 
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2006 Alec Thomas <alec@swapoff.org>
4 #
5 # This software is licensed as described in the file COPYING, which
6 # you should have received as part of this distribution.
7 #
8
9 """
10 Hype
11 ----
12
13 Adapter for Hyperestraier using the Hype bindings.
14
15 Hype_ is a Python wrapper for Hyperestraier_. Hype is only available through
16 SVN, but is quite stable and functional.
17
18 .. _Hype: http://hype.python-hosting.com
19 .. _Hyperestraier: http://hyperestraier.sourceforge.net/
20
21 Usage
22 ~~~~~
23
24 ::
25
26     hype://<path>?hype_mode=<int>&enable_scoring=<bool>
27
28
29 ``hype_mode`` (default: auto)
30     Override the default ``READONLY``/``READWRITE`` modes in Pyndexter and use
31     Hyperestraier database open modes. See the Hyperestraier docs for details.
32
33 ``enable_scoring`` (default: ``true``)
34     Put Hyperestraier into a debug mode where scores are returned. This is
35     apparently somewhat slower, but I have not observed a massive difference.
36
37 Installation
38 ~~~~~~~~~~~~
39
40 Install your distributions Hyperestraier package.
41
42 ::
43
44     svn co http://svn.hype.python-hosting.com/trunk hype
45     cd hype
46     python setup.py install
47 """
48
49 import os
50 from pyndexter import *
51 import hype
52
53
54 __all__ = ['HypeIndexer', 'HypeResult']
55
56
57 class HypeIndexer(Indexer):
58     def __init__(self, framework, path, hype_mode=0, enable_scoring=True):
59         Indexer.__init__(self, framework)
60         self.path = path
61         self.hype_path = os.path.join(self.path, 'hype.db')
62         self.state_path = os.path.join(self.path, 'state.db')
63         self.enable_scoring = enable_scoring
64
65         if framework.mode == READONLY:
66             hype_mode |= hype.ESTDBREADER
67         elif framework.mode == READWRITE:
68             hype_mode |= hype.ESTDBWRITER
69             if not os.path.exists(self.hype_path):
70                 hype_mode |= hype.ESTDBCREAT
71                 os.makedirs(self.path)
72
73         self.db = hype.Database(self.hype_path, hype_mode)
74
75     def index(self, document):
76         hdoc = hype.Document(unicode(document.uri))
77         for k, v in document.attributes.iteritems():
78             if k != 'uri':
79                 hdoc['@' + k] = unicode(v)
80         for line in document.content.splitlines():
81             hdoc.add_text(line)
82         self.db.put_doc(hdoc)
83
84     def discard(self, uri):
85         doc = self.db.get_doc_by_uri(unicode(uri))
86         if not doc:
87             raise DocumentNotFound(uri)
88         self.db.remove(doc)
89
90     def search(self, query):
91         qs = query.as_string(not_='ANDNOT ').decode('utf-8')
92         search = self.db.search(qs)
93         return HypeResult(self, query, search, self.enable_scoring)
94
95     def optimise(self):
96         self.db.optimize()
97
98     def fetch(self, uri):
99         doc = self.db.get_doc_by_uri(unicode(uri))
100         if not doc:
101             raise DocumentNotFound(uri)
102         attributes = self._translate_attributes(doc)
103         return Document(content='\n'.join(doc.texts), quality=0.99, **attributes)
104
105     def flush(self):
106         self.db.sync()
107         self.db.flush()
108
109     def close(self):
110         self.db = None
111
112     # Internal methods
113     def _translate_attributes(self, hdoc):
114         attributes = {}
115         for k in hdoc.attributes:
116             if k[0] == '@':
117                 attributes[k[1:]] = hdoc.get(k)
118             else:
119                 attributes[k] = hdoc.get(k)
120         attributes['uri'] = URI(attributes['uri'])
121         return attributes
122
123
124 indexer_factory = PluginFactory(HypeIndexer, hype_mode=int,
125                                    enable_scoring=bool)
126
127
128 class HypeResult(Result):
129     def __init__(self, indexer, query, context, enable_scoring=True):
130         self.enable_scoring = enable_scoring
131         if enable_scoring:
132             context = context.scores().option(hype.ESTCONDSCFB)
133         Result.__init__(self, indexer, query, context)
134
135     def __iter__(self):
136         if self.enable_scoring:
137             for doc, score in self.context:
138                 yield self._translate(doc, score)
139         else:
140             for doc in self.context:
141                 yield self._translate(doc)
142
143     def __len__(self):
144         return len(self.context)
145
146     def __getitem__(self, index):
147         doc = self.context[index][0]
148         if self.enable_scoring:
149             score = self.context.get_score(index)
150         else:
151             score = None
152         return self._translate(doc, score)
153
154     # Internal methods
155     def _translate(self, doc, score=None):
156         attrs = self.indexer._translate_attributes(doc)
157         if self.enable_scoring:
158             if score is None:
159                 score = self.context.get_score(index)
160             attrs['score'] = score
161         return Hit(current=self.indexer.framework.fetch,
162                    indexed=self.indexer.fetch, **attrs)
Note: See TracBrowser for help on using the browser.