Changeset 370
- Timestamp:
- 02/04/07 03:35:23 (2 years ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pyndexter/branches/refactoring/pyndexter/indexers/xapian.py
r368 r370 21 21 22 22 class XapianIndexer(Indexer): 23 def __init__(self, framework, path, stemmer='english', words=r'\w+'): 23 def __init__(self, framework, path, stemmer='english', words=r'\w+', 24 max_term_length=240): 24 25 Indexer.__init__(self, framework) 25 26 self.stemmer = xapian.Stem(stemmer) 26 27 self.words = re.compile(words) 28 self.max_term_length = max_term_length 27 29 28 30 path = path.encode('utf-8') … … 34 36 if not os.path.exists(self.xapian_path): 35 37 os.makedirs(self.xapian_path) 36 self.db = xapian.flint_open(self.xapian_path, xapian.DB_CREATE_OR_OPEN) 38 self.db = xapian.flint_open(self.xapian_path, 39 xapian.DB_CREATE_OR_OPEN) 37 40 else: 38 41 self.db = xapian.flint_open(self.xapian_path) … … 51 54 for word in self.words.finditer(content): 52 55 term = self.stemmer.stem_word(word.group().lower()) 56 if len(term) > self.max_term_length: 57 continue 53 58 doc.add_posting(term, word.start()) 54 59 … … 70 75 71 76 def search(self, query): 72 query = query.encode('utf-8')73 77 query_parser = xapian.QueryParser() 74 78 query_parser.set_stemmer(self.stemmer)
