root/pyndexter/trunk/pyndexter/indexers/_lupy.py

Revision 452, 3.3 kB (checked in by athomas, 1 year ago)

pyndexter: All modules are now prefixed with _ to avoid import collisions. Updated unit tests.

  • Property svn:eol-style set to native
Line 
1 # -*- coding: utf-8 -*-
2 #
3 # Copyright (C) 2006 Alec Thomas <alec@swapoff.org>
4 #
5 # This software is licensed as described in the file COPYING, which
6 # you should have received as part of this distribution.
7 #
8
9 """
10 Lupy
11 ----
12
13 Lupy_ is a (deprecated) pure-Python indexer. It is excruciatingly slow,
14 presumably because of its desire to be compatible with Lucene. Included
15 as an excercise mostly :)
16
17 .. _Lupy: http://www.divmod.org/projects/lupy
18
19 Usage
20 ~~~~~
21
22 ::
23
24     lupy://<path>
25
26 Installation
27 ~~~~~~~~~~~~
28
29 ::
30
31     easy_install http://gentoo.prz.rzeszow.pl/distfiles/Lupy-0.2.1.tar.gz
32 """
33
34 import os
35 from pyndexter import *
36 import lupy
37 import lupy.indexer
38 import lupy.search
39
40
41 class LupyIndexer(Indexer):
42     def __init__(self, framework, path):
43         Indexer.__init__(self, framework)
44         self.path = path
45         self.db_path = os.path.join(self.path, 'lupy.db').encode('utf-8')
46         self.state_path = os.path.join(self.path, 'state.db')
47         if framework.mode == READWRITE and not os.path.exists(self.path):
48             os.makedirs(self.path)
49         self.db = lupy.indexer.Index(self.db_path,
50                                      create=framework.mode == \
51                                      READWRITE and not os.path.exists(self.db_path))
52
53
54     def index(self, document):
55         attributes = dict([('_' + k.encode('utf-8'), unicode(v))
56                            for k, v in document.attributes.iteritems()
57                            if v is not None])
58         self.discard(uri=document.uri)
59         self.db.index(text=document.content, **attributes)
60
61     def discard(self, uri):
62         self.db.delete(uri=unicode(uri))
63
64     def search(self, query):
65         lupy_query = lupy.indexer.BooleanQuery()
66         self._compile_query(query, (True, False), lupy_query)
67         searcher = lupy.search.indexsearcher.IndexSearcher(self.db_path)
68         hits = searcher.search(lupy_query)
69         return LupyResult(self, query, hits)
70
71     def optimise(self):
72         self.db.optimize()
73
74     def close(self):
75         self.db.close()
76
77     # Internal methods
78     def _compile_query(self, node, op, query):
79         if not node or node.type == node.NULL:
80             return
81         if node.type == node.AND:
82             self._compile_query(node.left, (True, False), query)
83             self._compile_query(node.right, (True, False), query)
84         elif node.type == node.OR:
85             self._compile_query(node.left, (False, False), query)
86             self._compile_query(node.right, (False, False), query)
87         elif node.type == node.NOT:
88             self._compile_query(node.left, (False, True), query)
89         elif node.type == node.TERM:
90             query.add(lupy.indexer.TermQuery(lupy.indexer.Term('text', node.value)), *op)
91         else:
92             raise NotImplementedError
93
94
95 indexer_factory = PluginFactory(LupyIndexer)
96
97 class LupyResult(Result):
98     def __iter__(self):
99         for index, doc in enumerate(self.context):
100             yield self._translate(index, doc)
101
102     def __getitem__(self, index):
103         return self._translate(index, self.context[index])
104
105     # Internal methods
106     def _translate(self, index, doc):
107         fields = dict([(str(k), doc.get(k)) for k in doc.fieldNames])
108         fields['score'] = self.context.score(index)
109         fields['uri'] = URI(fields['uri'])
110         return Hit(current=self.indexer.framework.fetch,
111                    indexed=self.indexer.fetch, **fields)
Note: See TracBrowser for help on using the browser.