Changeset 370

Show
Ignore:
Timestamp:
02/04/07 03:35:23 (2 years ago)
Author:
athomas
Message:

pyndexter: Added option max_term_length to the Xapian adapter for restricting
the length of terms added to the index. This is currently set to 240, the
maximum supported by the flint backend.

Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • pyndexter/branches/refactoring/pyndexter/indexers/xapian.py

    r368 r370  
    2121 
    2222class XapianIndexer(Indexer): 
    23     def __init__(self, framework, path, stemmer='english', words=r'\w+'): 
     23    def __init__(self, framework, path, stemmer='english', words=r'\w+', 
     24                 max_term_length=240): 
    2425        Indexer.__init__(self, framework) 
    2526        self.stemmer = xapian.Stem(stemmer) 
    2627        self.words = re.compile(words) 
     28        self.max_term_length = max_term_length 
    2729 
    2830        path = path.encode('utf-8') 
     
    3436            if not os.path.exists(self.xapian_path): 
    3537                os.makedirs(self.xapian_path) 
    36             self.db = xapian.flint_open(self.xapian_path, xapian.DB_CREATE_OR_OPEN) 
     38            self.db = xapian.flint_open(self.xapian_path, 
     39                                        xapian.DB_CREATE_OR_OPEN) 
    3740        else: 
    3841            self.db = xapian.flint_open(self.xapian_path) 
     
    5154        for word in self.words.finditer(content): 
    5255            term = self.stemmer.stem_word(word.group().lower()) 
     56            if len(term) > self.max_term_length: 
     57               continue 
    5358            doc.add_posting(term, word.start()) 
    5459 
     
    7075 
    7176    def search(self, query): 
    72         query = query.encode('utf-8') 
    7377        query_parser = xapian.QueryParser() 
    7478        query_parser.set_stemmer(self.stemmer)