Changeset 384
- Timestamp:
- 02/11/07 06:40:56 (2 years ago)
- Files:
-
- pyndexter/trunk/pyndexter/__init__.py (modified) (3 diffs)
- pyndexter/trunk/pyndexter/sources/file.py (modified) (1 diff)
- pyndexter/trunk/pyndexter/util.py (modified) (4 diffs)
- pyndexter/trunk/.todo (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
pyndexter/trunk/pyndexter/__init__.py
r383 r384 820 820 """ Update the index with the current state of the document source. 821 821 822 `filter` is a callable in the form `( context, stream)`, where `stream`823 is an iterable of `(transition, uri)` pairs."""822 `filter` is a callable in the form `(framework, context, stream)`, 823 where `stream` is an iterable of `(transition, uri)` pairs.""" 824 824 self._assert_rw() 825 825 if not self.state_store: … … 834 834 if self.state_store.exists(): 835 835 store = self.state_store.retrieve() 836 for transition, uri in filter( context,836 for transition, uri in filter(self, context, 837 837 self.source.difference(store)): 838 838 if transition == REMOVED: … … 847 847 yield ADDED, uri 848 848 849 for transition, uri in filter( context, fake_difference()):849 for transition, uri in filter(self, context, fake_difference()): 850 850 self.index(uri) 851 851 pyndexter/trunk/pyndexter/sources/file.py
r379 r384 28 28 29 29 class FileSource(Source): 30 """ Expose a subset of the file system for searching. """30 """ Expose a subset of the file system for searching.""" 31 31 32 32 def __init__(self, framework, path, include=None, exclude=None, predicate=None): pyndexter/trunk/pyndexter/util.py
r379 r384 9 9 import re 10 10 import posixpath 11 import sys 11 12 from StringIO import StringIO 12 13 from urllib import quote, unquote 14 from datetime import datetime, timedelta 13 15 try: 14 16 set = set … … 22 24 set frozenset 23 25 quote unquote 24 URI 26 URI TimingFilter 25 27 """.split() 26 28 … … 36 38 scheme://username:password@host/path;parameters?query#fragment 37 39 38 PS. `urlparse` is not useful. """ 40 PS. `urlparse` is not useful. 41 42 >>> u = URI('http://user:password@www.example.com/some/path?parm=1&parm=2&other=3#fragment') 43 >>> u 44 http://user:password@www.example.com/some/path?other=3&parm=1&parm=2#fragment 45 >>> u.scheme 46 'http' 47 >>> u.username 48 'user' 49 >>> u.password 50 'password' 51 >>> u.host 52 'www.example.com' 53 >>> u.path 54 '/some/path' 55 >>> u.query 56 {'parm': ['1', '2'], 'other': ['3']} 57 >>> u.fragment 58 'fragment' 59 60 URI also normalises the path component: 61 62 >>> URI('http://www.example.com//some/../foo/path/') 63 http://www.example.com/foo/path 64 """ 39 65 40 66 _pattern = re.compile(r'(?:(?P<scheme>[^:]+)://)?(?:(?P<username>[^:@]*)(?::(?P<password>[^@]*))?@)?(?P<host>[^?/#:]*)(?::(P<port>[\d+]+))?(?P<path>/[^#?]*)?(?:\?(?P<query>[^#]*))?(?:#(?P<fragment>.*))?') … … 104 130 uri += '#' + quote(self.fragment) 105 131 return uri 132 133 134 class TimingFilter(object): 135 """A Framework filter for collecting timing statistics.""" 136 def __init__(self, next_filter=None, progressive=False): 137 """`next_filter` is the next filter in the chain. 138 139 `progressive` will print statistics while the indexer is running.""" 140 if next_filter: 141 self.next_filter = next_filter 142 self.times = [] 143 self.total = timedelta() 144 self.average = timedelta() 145 self.progressive = progressive 146 147 def next_filter(self, framework, context, stream): 148 for transition, uri in stream: 149 yield transition, uri 150 151 def __call__(self, framework, context, stream): 152 self.times = [] 153 for transition, uri in self.next_filter(framework, context, stream): 154 start = datetime.now() 155 yield transition, uri 156 end = datetime.now() 157 line = (transition, uri, start, end) 158 self.times.append(line) 159 if self.progressive: 160 self.print_line(*line) 161 162 self.total = timedelta() 163 self.average = timedelta() 164 for transition, uri, start, end in self.times: 165 self.total += end - start 166 if self.total: 167 self.average = self.total / len(self.times) 168 if self.progressive: 169 self.print_summary() 170 171 def print_line(self, transition, uri, start, end, out=sys.stdout): 172 from pyndexter import MODIFIED, ADDED, REMOVED 173 mapping = {MODIFIED: 'MODIFIED', ADDED: 'ADDED', REMOVED: 'REMOVED'} 174 print >>out, '%s %s (in %s)' % (mapping[transition], uri, end - start) 175 176 def print_summary(self, out=sys.stdout): 177 print >>out 178 print >>out, "Indexed %i documents" % len(self.times) 179 print >>out, 'Total time to index: %s' % self.total 180 print >>out, 'Average time to index: %s' % self.average 181 182 def __str__(self): 183 from StringIO import StringIO 184 out = StringIO() 185 for transition, uri, start, end in self.times: 186 self.print_line(transition, uri, start, end, out=out) 187 self.print_summary(out) 188 return out.getvalue() pyndexter/trunk/.todo
r382 r384 123 123 Fix port parsing in util.URI. 124 124 </note> 125 <note priority="medium" time="1171055477"> 126 Write a decent test suite. 127 </note> 125 128 </todo>
