Changeset 384

Show
Ignore:
Timestamp:
02/11/07 06:40:56 (2 years ago)
Author:
athomas
Message:

pyndexter:

  • Framework filters will now be passed the Framework object as their first
    argument, in addition to context and stream.
  • Added util.TimingFilter, a Framework filter useful for collecting
    timing statistics.
  • Added some doctests to URI.
Files:

Legend:

Unmodified
Added
Removed
Modified
Copied
Moved
  • pyndexter/trunk/pyndexter/__init__.py

    r383 r384  
    820820        """ Update the index with the current state of the document source. 
    821821 
    822         `filter` is a callable in the form `(context, stream)`, where `stream` 
    823         is an iterable of `(transition, uri)` pairs.""" 
     822        `filter` is a callable in the form `(framework, context, stream)`, 
     823        where `stream` is an iterable of `(transition, uri)` pairs.""" 
    824824        self._assert_rw() 
    825825        if not self.state_store: 
     
    834834        if self.state_store.exists(): 
    835835            store = self.state_store.retrieve() 
    836             for transition, uri in filter(context, 
     836            for transition, uri in filter(self, context, 
    837837                                          self.source.difference(store)): 
    838838                if transition == REMOVED: 
     
    847847                    yield ADDED, uri 
    848848 
    849             for transition, uri in filter(context, fake_difference()): 
     849            for transition, uri in filter(self, context, fake_difference()): 
    850850                self.index(uri) 
    851851 
  • pyndexter/trunk/pyndexter/sources/file.py

    r379 r384  
    2828 
    2929class FileSource(Source): 
    30     """ Expose a subset of the file system for searching. """ 
     30    """ Expose a subset of the file system for searching.""" 
    3131 
    3232    def __init__(self, framework, path, include=None, exclude=None, predicate=None): 
  • pyndexter/trunk/pyndexter/util.py

    r379 r384  
    99import re 
    1010import posixpath 
     11import sys 
    1112from StringIO import StringIO 
    1213from urllib import quote, unquote 
     14from datetime import datetime, timedelta 
    1315try: 
    1416    set = set 
     
    2224set frozenset 
    2325quote unquote 
    24 URI 
     26URI TimingFilter 
    2527""".split() 
    2628 
     
    3638        scheme://username:password@host/path;parameters?query#fragment 
    3739 
    38     PS. `urlparse` is not useful. """ 
     40    PS. `urlparse` is not useful. 
     41 
     42    >>> u = URI('http://user:password@www.example.com/some/path?parm=1&parm=2&other=3#fragment') 
     43    >>> u 
     44    http://user:password@www.example.com/some/path?other=3&parm=1&parm=2#fragment 
     45    >>> u.scheme 
     46    'http' 
     47    >>> u.username 
     48    'user' 
     49    >>> u.password 
     50    'password' 
     51    >>> u.host 
     52    'www.example.com' 
     53    >>> u.path 
     54    '/some/path' 
     55    >>> u.query 
     56    {'parm': ['1', '2'], 'other': ['3']} 
     57    >>> u.fragment 
     58    'fragment' 
     59 
     60    URI also normalises the path component: 
     61 
     62    >>> URI('http://www.example.com//some/../foo/path/') 
     63    http://www.example.com/foo/path 
     64    """ 
    3965 
    4066    _pattern = re.compile(r'(?:(?P<scheme>[^:]+)://)?(?:(?P<username>[^:@]*)(?::(?P<password>[^@]*))?@)?(?P<host>[^?/#:]*)(?::(P<port>[\d+]+))?(?P<path>/[^#?]*)?(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?') 
     
    104130            uri += '#' + quote(self.fragment) 
    105131        return uri 
     132 
     133 
     134class TimingFilter(object): 
     135    """A Framework filter for collecting timing statistics.""" 
     136    def __init__(self, next_filter=None, progressive=False): 
     137        """`next_filter` is the next filter in the chain. 
     138 
     139        `progressive` will print statistics while the indexer is running.""" 
     140        if next_filter: 
     141            self.next_filter = next_filter 
     142        self.times = [] 
     143        self.total = timedelta() 
     144        self.average = timedelta() 
     145        self.progressive = progressive 
     146 
     147    def next_filter(self, framework, context, stream): 
     148        for transition, uri in stream: 
     149            yield transition, uri 
     150 
     151    def __call__(self, framework, context, stream): 
     152        self.times = [] 
     153        for transition, uri in self.next_filter(framework, context, stream): 
     154            start = datetime.now() 
     155            yield transition, uri 
     156            end = datetime.now() 
     157            line = (transition, uri, start, end) 
     158            self.times.append(line) 
     159            if self.progressive: 
     160                self.print_line(*line) 
     161 
     162        self.total = timedelta() 
     163        self.average = timedelta() 
     164        for transition, uri, start, end in self.times: 
     165            self.total += end - start 
     166        if self.total: 
     167            self.average = self.total / len(self.times) 
     168        if self.progressive: 
     169            self.print_summary() 
     170 
     171    def print_line(self, transition, uri, start, end, out=sys.stdout): 
     172        from pyndexter import MODIFIED, ADDED, REMOVED 
     173        mapping = {MODIFIED: 'MODIFIED', ADDED: 'ADDED', REMOVED: 'REMOVED'} 
     174        print >>out, '%s %s (in %s)' % (mapping[transition], uri, end - start) 
     175 
     176    def print_summary(self, out=sys.stdout): 
     177        print >>out 
     178        print >>out, "Indexed %i documents" % len(self.times) 
     179        print >>out, 'Total time to index: %s' % self.total 
     180        print >>out, 'Average time to index: %s' % self.average 
     181 
     182    def __str__(self): 
     183        from StringIO import StringIO 
     184        out = StringIO() 
     185        for transition, uri, start, end in self.times: 
     186            self.print_line(transition, uri, start, end, out=out) 
     187        self.print_summary(out) 
     188        return out.getvalue() 
  • pyndexter/trunk/.todo

    r382 r384  
    123123        Fix port parsing in util.URI. 
    124124    </note> 
     125    <note priority="medium" time="1171055477"> 
     126        Write a decent test suite. 
     127    </note> 
    125128</todo>