Show
Ignore:
Timestamp:
30/04/06 14:09:58 (4 years ago)
Author:
athomas
Message:

pyndexter:

  • Added a horribly inefficient built-in indexer, default.DefaultIndexer. There seems to be a memory leak somewhere, so on large datasets the indexer will consume large amounts of memory.
  • Added a util module with CacheDict, currently used by the default indexer.
  • Added some more CAP_ bits.
  • Source state data is now accumulated by the Source classes __iter__() method, no longer requiring a full walk of the source to collect state. This means an Indexer.update() will automagically do the right thing.
  • Factored out some common environment initialisation code into Indexer._init_env().
  • Factored FileSource include/exclude/predicate code into base Source class so it can be reused.
Files:
1 modified

Legend:

Unmodified
Added
Removed
  • pyndexter/trunk/pyndexter/file.py

    r331 r332  
    11import sys 
    22import codecs 
    3 import os.path 
    4 from fnmatch import fnmatch 
    5 from dircache import listdir 
     3import os 
     4from stat import * 
    65from urlparse import urlsplit, urlunsplit 
    76 
     
    1110    def __init__(self, root, include=['*'], exclude=[], predicate=None): 
    1211        """ Expose a subset of the file system for searching. """ 
     12        Source.__init__(self, include, exclude, predicate) 
    1313        self.root = os.path.normpath(root) 
    14         self.include = include 
    15         self.exclude = exclude 
    16         self.predicate = predicate or self._glob_predicate 
    1714        self.encoding = sys.getfilesystemencoding() 
    1815 
     
    2118            path = path.strip(os.path.sep) 
    2219            root_path = os.path.join(self.root, path) 
    23             for file in listdir(root_path): 
     20            for file in os.listdir(root_path): 
    2421                full_path = os.path.join(root_path, file) 
    25                 if os.path.isdir(full_path): 
     22                try: 
     23                    stat = os.lstat(full_path) 
     24                except OSError: 
     25                    continue 
     26                if not self.predicate(full_path) or not os.access(full_path, os.R_OK): 
     27                    continue 
     28                if S_ISDIR(stat.st_mode): 
    2629                    for file in walk_path(os.path.join(path, file)): 
    2730                        yield file 
    28                 elif self.predicate(full_path) and os.path.exists(full_path): 
    29                     # TODO Stat for normal files + readability 
    30                     yield self._file2uri(full_path) 
     31                elif S_ISREG(stat.st_mode): 
     32                    yield (self._file2uri(full_path).decode(self.encoding), stat) 
    3133 
    32         for file in walk_path('/'): 
    33             yield file.decode(self.encoding) 
     34        for file, stat in walk_path('/'): 
     35            self._state[file] = stat.st_mtime 
     36            yield file 
    3437 
    3538    def matches(self, uri): 
    36         scheme, netloc, path, query, fragment = urlsplit(uri) 
     39        scheme, netloc, path, query, fragment = urlsplit(uri, 'file') 
    3740        path = os.path.normpath(path) 
    38         return scheme in ('file', '') and \ 
     41        return scheme == 'file' and \ 
    3942               path.startswith(self.root) and \ 
    4043               self.predicate(path) 
     
    6770 
    6871    def _uri2file(self, uri): 
    69         scheme, location, path, query, fragment = urlsplit(uri) 
    70         if scheme not in ('file', ''): 
     72        scheme, location, path, query, fragment = urlsplit(uri, 'file') 
     73        if scheme not in 'file': 
    7174            raise InvalidURI("URI scheme in '%s' not supported by FileSource" 
    7275                             % scheme) 
     
    7679                             % uri) 
    7780        return path.decode(self.encoding) 
    78  
    79     def _glob_predicate(self, file): 
    80         for pattern in self.exclude: 
    81             if fnmatch(file, pattern): 
    82                 return False 
    83         for pattern in self.include: 
    84             if fnmatch(file, pattern): 
    85                 return True 
    86         return False 
    87