| 1 |
# -*- coding: utf-8 -*- |
|---|
| 2 |
# |
|---|
| 3 |
# Copyright (C) 2006 Alec Thomas <alec@swapoff.org> |
|---|
| 4 |
# |
|---|
| 5 |
# This software is licensed as described in the file COPYING, which |
|---|
| 6 |
# you should have received as part of this distribution. |
|---|
| 7 |
# |
|---|
| 8 |
|
|---|
| 9 |
""" |
|---|
| 10 |
Pyndex |
|---|
| 11 |
------ |
|---|
| 12 |
|
|---|
| 13 |
Pyndex_ is a pure-Python indexer written |
|---|
| 14 |
by the busy Divmod folks. It is quite fast, but again, no longer supported. |
|---|
| 15 |
|
|---|
| 16 |
**Note:** Pyndex does not support document deletion. I have hacked around this |
|---|
| 17 |
by inserting an empty document but this is obviously not ideal. |
|---|
| 18 |
|
|---|
| 19 |
.. _Pyndex: http://www.divmod.org/projects/pyndex |
|---|
| 20 |
|
|---|
| 21 |
Usage |
|---|
| 22 |
~~~~~ |
|---|
| 23 |
|
|---|
| 24 |
:: |
|---|
| 25 |
|
|---|
| 26 |
pyndex://<path> |
|---|
| 27 |
|
|---|
| 28 |
Installation |
|---|
| 29 |
~~~~~~~~~~~~ |
|---|
| 30 |
|
|---|
| 31 |
:: |
|---|
| 32 |
|
|---|
| 33 |
easy_install http://downloads.sourceforge.net/pyndex/Pyndex-0.3.2a.tar.gz |
|---|
| 34 |
""" |
|---|
| 35 |
|
|---|
| 36 |
import os |
|---|
| 37 |
from pyndexter import * |
|---|
| 38 |
import metakit |
|---|
| 39 |
import pyndex |
|---|
| 40 |
import pyndex.indexer |
|---|
| 41 |
|
|---|
| 42 |
|
|---|
| 43 |
class PyndexIndexer(Indexer): |
|---|
| 44 |
def __init__(self, framework, path): |
|---|
| 45 |
Indexer.__init__(self, framework) |
|---|
| 46 |
self.path = path |
|---|
| 47 |
self.pyndex_path = os.path.join(self.path, 'pyndex.db') |
|---|
| 48 |
self.state_path = os.path.join(self.path, 'state.db') |
|---|
| 49 |
|
|---|
| 50 |
if framework.mode == READWRITE: |
|---|
| 51 |
if not os.path.exists(self.path): |
|---|
| 52 |
os.makedirs(self.path) |
|---|
| 53 |
|
|---|
| 54 |
rw = framework.mode == READWRITE and 1 or 0 |
|---|
| 55 |
self.db = pyndex.indexer.Index(metakit.storage(self.pyndex_path, rw)) |
|---|
| 56 |
|
|---|
| 57 |
def index(self, document): |
|---|
| 58 |
uri = unicode(document.uri).encode('utf-8') |
|---|
| 59 |
self.db.index(uri, document.content.encode('utf-8')) |
|---|
| 60 |
|
|---|
| 61 |
def discard(self, uri): |
|---|
| 62 |
# FIXME Is there a supported way of deleting documents? This is hackish. |
|---|
| 63 |
# FIXME Is there a way of storing attributes? |
|---|
| 64 |
self.db.index(unicode(uri).encode('utf-8'), '') |
|---|
| 65 |
|
|---|
| 66 |
def search(self, query): |
|---|
| 67 |
# FIXME Should probably do a search on each term, and perform set |
|---|
| 68 |
# operations. |
|---|
| 69 |
qs = ' '.join(query.terms()).encode('utf-8') |
|---|
| 70 |
return PyndexResult(self, query, self.db.find(qs)) |
|---|
| 71 |
|
|---|
| 72 |
def optimise(self): |
|---|
| 73 |
self.db.optimize() |
|---|
| 74 |
|
|---|
| 75 |
def flush(self): |
|---|
| 76 |
self.db.commit() |
|---|
| 77 |
|
|---|
| 78 |
def close(self): |
|---|
| 79 |
self.db.close() |
|---|
| 80 |
|
|---|
| 81 |
|
|---|
| 82 |
indexer_factory = PluginFactory(PyndexIndexer) |
|---|
| 83 |
|
|---|
| 84 |
class PyndexResult(Result): |
|---|
| 85 |
def __iter__(self): |
|---|
| 86 |
for hit in self.context: |
|---|
| 87 |
yield self._translate(hit) |
|---|
| 88 |
|
|---|
| 89 |
def __getitem__(self, index): |
|---|
| 90 |
return self._translate(self.context[index]) |
|---|
| 91 |
|
|---|
| 92 |
def _translate(self, hit): |
|---|
| 93 |
return Hit(uri=URI(hit.doc.docname), score=hit.score, |
|---|
| 94 |
current=self.indexer.framework.fetch, |
|---|
| 95 |
indexed=self.indexer.fetch) |
|---|