Changeset 393
- Timestamp:
- 02/14/07 06:20:22 (2 years ago)
- Files:
-
- pyndexter/trunk/pyndexter/indexers/builtin.py (modified) (2 diffs)
- pyndexter/trunk/pyndexter/indexers/mock.py (modified) (1 diff)
- pyndexter/trunk/pyndexter/indexers/tests.py (modified) (4 diffs)
- pyndexter/trunk/pyndexter/__init__.py (modified) (1 diff)
- pyndexter/trunk/pyndexter/tests/corpus.py (modified) (1 diff)
- pyndexter/trunk/pyndexter/tests/framework.py (modified) (2 diffs)
- pyndexter/trunk/pyndexter/tests/__init__.py (modified) (1 diff)
- pyndexter/trunk/pyndexter/tests/unicode_corpus.py (added)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pyndexter/trunk/pyndexter/indexers/builtin.py
r387 r393 235 235 def search(self, query): 236 236 query.reduce(self.framework.reduce) 237 # FIXME currently simply finding the intersection of all documents 238 # (AND) 237 # FIXME NOT is not supported yet 239 238 # FIXME Words without a WID can be automatically excluded from the 240 239 # search 241 words = self._wids(query.terms()) 242 uris = None 243 for word in words: 244 if uris is None: 245 uris = self.words.get(word) 246 else: 247 uris.intersection_update(self.words.get(word)) 248 240 241 # class invertedset(object): 242 # def __init__(self, items=[]): 243 # self.items = items 244 # 245 # def intersection(self, other): 246 # self. 247 248 def visit(node): 249 if node.type == node.TERM: 250 return self.words.get(self._wid(node.value)) 251 elif node.type == node.AND: 252 return visit(node.left).intersection(visit(node.right)) 253 elif node.type == node.OR: 254 return visit(node.left).union(visit(node.right)) 255 elif node.type == node.NOT: 256 raise NotImplementedError('NOT is ... not supported') 257 #return invertedset(visit(node.left)) 258 259 uris = visit(query) 249 260 return BuiltinResult(self, query, list(self._words(uris))) 250 261 … … 271 282 id = self.config['wordid'] 272 283 self.config['wordid'] = id + 1 284 id = unicode(id) 273 285 self.wordid[word] = id 274 286 self.idword[id] = word pyndexter/trunk/pyndexter/indexers/mock.py
r391 r393 78 78 def __iter__(self): 79 79 for uri in self.context: 80 if self.query(self.indexer. cache[uri].lower()):80 if self.query(self.indexer.framework.reduce(self.indexer.cache[uri].lower())): 81 81 yield self._translate(uri) 82 82 pyndexter/trunk/pyndexter/indexers/tests.py
r392 r393 7 7 8 8 class IndexerTestCase(unittest.TestCase): 9 10 9 def setUp(self): 11 10 self.path = tempfile.mkdtemp() 12 11 13 indexer = self.__class__.__name__[:-8].lower() 14 self.framework = Framework('%s://%s' % (indexer, os.path.join(self.path, indexer))) 12 try: 13 indexer = self.indexer % os.path.join(self.path, 'indexer.db') 14 except AttributeError: 15 indexer = '%s://%s' % (self.__class__.__name__[:-8].lower(), os.path.join(self.path, 'indexer.db')) 16 17 self.framework = Framework(indexer) 15 18 self.framework.add_source('mock://') 16 19 … … 41 44 self.assertEquals(filtered, [(2, URI(u'mock://12'))]) 42 45 43 def test_search(self): 46 def test_fetch_via_source(self): 47 uri = u'mock://1' 48 doc = self.framework.fetch(uri) 49 self.assertEquals(doc.quality, 1.0) 50 self.assertEquals(doc.content, documents[uri].content) 51 self.assertRaises(DocumentNotFound, self.framework.fetch, u'file://foo') 52 53 def test_indexer_iteration(self): 44 54 self.framework.update() 45 uris = [hit.uri for hit in self.framework.search(and_query)] 46 uris.sort() 47 self.assertEquals(uris, and_hits) 55 self.assertEquals(mock_uri_list, sorted([uri for uri in self.framework.indexer])) 56 57 def test_source_iteration(self): 58 self.framework.update() 59 self.assertEquals(mock_uri_list, sorted([uri for uri in self.framework.source])) 60 61 # Search tests 62 def test_search_string_simple(self): 63 self.framework.update() 64 self.assertEquals(simple_hits, sorted([hit.uri for hit in self.framework.search(simple_query)])) 65 66 def test_search_Query_simple(self): 67 self.framework.update() 68 query = Query(simple_query) 69 self.assertEquals(simple_hits, sorted([hit.uri for hit in self.framework.search(query)])) 70 71 def test_search_string_and(self): 72 self.framework.update() 73 self.assertEquals(and_hits, sorted([hit.uri for hit in self.framework.search(and_query)])) 74 75 def test_search_string_not(self): 76 self.framework.update() 77 self.assertEquals(not_hits, sorted([hit.uri for hit in self.framework.search(not_query)])) 78 79 80 class BuiltinTestCase(IndexerTestCase): 81 pass 82 83 84 class BuiltinCachingTestCase(IndexerTestCase): 85 indexer = 'builtin://%s?cache=true' 86 87 88 class BuiltinCompactTestCase(IndexerTestCase): 89 indexer = 'builtin://%s?compact=true' 90 91 92 class BuiltinCompactCachingTestCase(IndexerTestCase): 93 indexer = 'builtin://%s?cache=true&compact=true' 48 94 49 95 … … 57 103 58 104 class HyperestraierTestCase(IndexerTestCase): 59 pass60 61 62 class BuiltinTestCase(IndexerTestCase):63 105 pass 64 106 … … 78 120 def suite(): 79 121 suite = unittest.TestSuite() 80 suite.addTest(unittest.makeSuite(BuiltinTestCase, 'test')) 81 suite.addTest(unittest.makeSuite(XapianTestCase, 'test')) 82 suite.addTest(unittest.makeSuite(HypeTestCase, 'test')) 83 suite.addTest(unittest.makeSuite(HyperestraierTestCase, 'test')) 84 suite.addTest(unittest.makeSuite(PyndexTestCase, 'test')) 85 suite.addTest(unittest.makeSuite(LuceneTestCase, 'test')) 86 suite.addTest(unittest.makeSuite(LupyTestCase, 'test')) 122 suite.addTest(unittest.makeSuite(BuiltinTestCase)) 123 suite.addTest(unittest.makeSuite(BuiltinCachingTestCase)) 124 suite.addTest(unittest.makeSuite(BuiltinCompactTestCase)) 125 suite.addTest(unittest.makeSuite(BuiltinCompactCachingTestCase)) 126 suite.addTest(unittest.makeSuite(XapianTestCase)) 127 suite.addTest(unittest.makeSuite(HypeTestCase)) 128 suite.addTest(unittest.makeSuite(HyperestraierTestCase)) 129 suite.addTest(unittest.makeSuite(PyndexTestCase)) 130 suite.addTest(unittest.makeSuite(LuceneTestCase)) 131 suite.addTest(unittest.makeSuite(LupyTestCase)) 87 132 return suite 88 133 pyndexter/trunk/pyndexter/__init__.py
r392 r393 42 42 from urlparse import urlsplit, urlunsplit 43 43 from pyndexter.util import set, URI 44 45 46 __version__ = '0.2' 47 __author__ = 'Alec Thomas <alec@swapoff.org>' 44 48 45 49 pyndexter/trunk/pyndexter/tests/corpus.py
r392 r393 223 223 and_query = u'lorem ipsum' 224 224 225 not_hits = map(URI, [u'mock://16', u'mock://3', u'mock://4'])225 not_hits = [URI(u'mock://16'), URI(u'mock://3'), URI(u'mock://4')] 226 226 not_query = u'lorem ipsum -placerat' 227 pyndexter/trunk/pyndexter/tests/framework.py
r392 r393 10 10 from pyndexter import * 11 11 from pyndexter.sources.mock import MockSource 12 from pyndexter.indexers.mock import MockIndexer , MockResult13 from pyndexter. tests.corpus import *12 from pyndexter.indexers.mock import MockIndexer 13 from pyndexter.indexers.tests import IndexerTestCase 14 14 15 15 16 class FrameworkTestCase( unittest.TestCase):16 class FrameworkTestCase(IndexerTestCase): 17 17 def setUp(self): 18 18 self.framework = Framework('mock://') 19 19 self.framework.add_source('mock://') 20 self.framework.update()21 20 22 21 def tearDown(self): … … 27 26 self.assertTrue(isinstance(self.framework.source.sources[0], MockSource)) 28 27 29 def test_fetch(self):30 uri = u'mock://1'31 doc = self.framework.fetch(uri)32 self.assertEquals(doc.quality, 1.0)33 self.assertEquals(doc.content, documents[uri].content)34 self.assertRaises(DocumentNotFound, self.framework.fetch, u'file://foo')35 36 def test_indexer_iteration(self):37 self.assertEquals(mock_uri_list, [uri for uri in self.framework.indexer])38 39 def test_source_iteration(self):40 self.assertEquals(mock_uri_list, [uri for uri in self.framework.source])41 42 def test_search_string_simple(self):43 self.assertEquals(simple_hits,44 [hit.uri for hit in45 self.framework.search(simple_query)])46 47 def test_search_Query_simple(self):48 query = Query(simple_query)49 self.assertEquals(simple_hits,50 [hit.uri for hit in self.framework.search(query)])51 52 def test_search_string_and(self):53 self.assertEquals(and_hits,54 [hit.uri for hit in55 self.framework.search(and_query)])56 57 def test_search_string_not(self):58 self.assertEquals(not_hits,59 [hit.uri for hit in60 self.framework.search(not_query)])61 62 28 63 29 def suite(): 64 30 suite = unittest.TestSuite() 65 suite.addTest(unittest.makeSuite(FrameworkTestCase , 'test'))31 suite.addTest(unittest.makeSuite(FrameworkTestCase)) 66 32 return suite 67 33 pyndexter/trunk/pyndexter/tests/__init__.py
r390 r393 1 1 import unittest 2 2 import doctest 3 3 4 4 5 def suite():
