| 23 | | """Parse a URI into its component parts. The `query` component is passed |
|---|
| 24 | | through `cgi.parse_qs()`. |
|---|
| 25 | | |
|---|
| 26 | | scheme://username:password@host/path?query#fragment |
|---|
| 27 | | |
|---|
| 28 | | Each component is available as an attribute of the object. |
|---|
| 29 | | |
|---|
| 30 | | TODO: Support "parameters???" Never seen this in the wild: |
|---|
| 31 | | scheme://username:password@host/path;parameters?query#fragment |
|---|
| 32 | | |
|---|
| 33 | | PS. `urlparse` is not useful. |
|---|
| 34 | | |
|---|
| 35 | | The URI constructor can be passed a string: |
|---|
| 36 | | |
|---|
| 37 | | >>> u = URI('http://user:password@www.example.com:12345/some/path?parm=1&parm=2&other=3#fragment') |
|---|
| 38 | | >>> u |
|---|
| 39 | | URI(u'http://user:password@www.example.com:12345/some/path?other=3&parm=1&parm=2#fragment') |
|---|
| 40 | | >>> u.scheme |
|---|
| 41 | | 'http' |
|---|
| 42 | | >>> u.username |
|---|
| 43 | | 'user' |
|---|
| 44 | | >>> u.password |
|---|
| 45 | | 'password' |
|---|
| 46 | | >>> u.host |
|---|
| 47 | | 'www.example.com' |
|---|
| 48 | | >>> u.path |
|---|
| 49 | | '/some/path' |
|---|
| 50 | | >>> u.query |
|---|
| 51 | | {'parm': ['1', '2'], 'other': ['3']} |
|---|
| 52 | | >>> u.fragment |
|---|
| 53 | | 'fragment' |
|---|
| 54 | | |
|---|
| 55 | | ...or the individual URI components as keyword arguments: |
|---|
| 56 | | |
|---|
| 57 | | >>> URI(scheme='http', username='user', password='password', host='www.example.com', port=12345, path='/some/path', query={'parm': [1, 2], 'other': [3]}, fragment='fragment') |
|---|
| 58 | | URI(u'http://user:password@www.example.com:12345/some/path?other=3&parm=1&parm=2#fragment') |
|---|
| 59 | | |
|---|
| 60 | | ...or a combination of the two, in which case keyword arguments are used as |
|---|
| 61 | | defaults if not provided by the URI: |
|---|
| 62 | | |
|---|
| 63 | | >>> URI('http://localhost', port=80) |
|---|
| 64 | | URI(u'http://localhost:80') |
|---|
| 65 | | |
|---|
| 66 | | ...and finally, another URI object: |
|---|
| 67 | | |
|---|
| 68 | | >>> v = URI(u) |
|---|
| 69 | | >>> v == u |
|---|
| 70 | | True |
|---|
| 71 | | >>> v.query is u.query |
|---|
| 72 | | False |
|---|
| 73 | | >>> v |
|---|
| 74 | | URI(u'http://user:password@www.example.com:12345/some/path?other=3&parm=1&parm=2#fragment') |
|---|
| 75 | | |
|---|
| 76 | | URI also normalises the path component: |
|---|
| 77 | | |
|---|
| 78 | | >>> URI('http://www.example.com//some/../foo/path/') |
|---|
| 79 | | URI(u'http://www.example.com/foo/path') |
|---|
| 80 | | |
|---|
| 81 | | Query parameters must be passed as a dictionary with list values. Values |
|---|
| 82 | | are encoded automatically: |
|---|
| 83 | | |
|---|
| 84 | | >>> URI('http://localhost', query={'q': ['#1', 'foo']}) |
|---|
| 85 | | URI(u'http://localhost?q=%231&q=foo') |
|---|
| 86 | | """ |
|---|
| 87 | | |
|---|
| 88 | | _pattern = re.compile(r""" |
|---|
| 89 | | (?:(?P<scheme>[^:]+)://)? |
|---|
| 90 | | (?:(?P<username>[^:@]*) |
|---|
| 91 | | (?::(?P<password>[^@]*))?@)? |
|---|
| 92 | | (?P<host>[^?/#:]*) |
|---|
| 93 | | (?::(?P<port>[\d]+))? |
|---|
| 94 | | (?P<path>/[^#?]*)? |
|---|
| 95 | | (?:\?(?P<query>[^#]*))? |
|---|
| 96 | | (?:\#(?P<fragment>.*))? |
|---|
| 97 | | """, re.VERBOSE) |
|---|
| 98 | | |
|---|
| 99 | | __slots__ = ['scheme', 'username', 'password', 'host', 'port', '_path', |
|---|
| 100 | | 'query', 'fragment'] |
|---|
| 101 | | |
|---|
| 102 | | def __init__(self, uri=None, scheme=None, username=None, password=None, |
|---|
| 103 | | host=None, port=None, path=None, query=None, fragment=None): |
|---|
| 104 | | # TODO(aat) Make the logic in this constructor more efficient. |
|---|
| 105 | | self._path = '' |
|---|
| 106 | | query = query or {} |
|---|
| 107 | | # Copy attributes of a URI object |
|---|
| 108 | | if isinstance(uri, URI): |
|---|
| 109 | | from copy import copy |
|---|
| 110 | | (self.scheme, self.username, self.password, self.host, self.port, |
|---|
| 111 | | self.path, self.query, self.fragment) = ( |
|---|
| 112 | | uri.scheme, uri.username, uri.password, uri.host, |
|---|
| 113 | | uri.port, uri.path, copy(uri.query), uri.fragment) |
|---|
| 114 | | elif uri is not None: |
|---|
| 115 | | # Parse URI string |
|---|
| 116 | | from cgi import parse_qs |
|---|
| 117 | | |
|---|
| 118 | | match = self._pattern.match(uri) |
|---|
| 119 | | if match is None: |
|---|
| 120 | | raise ValueError('Invalid URI') |
|---|
| 121 | | groups = [g or '' for g in match.groups()] |
|---|
| 122 | | groups = (map(urllib.unquote, groups[0:6]) + |
|---|
| 123 | | [parse_qs(groups[6] or '')] + |
|---|
| 124 | | map(urllib.unquote, groups[7:])) |
|---|
| 125 | | (self.scheme, self.username, self.password, self.host, self.port, |
|---|
| 126 | | self.path, self.query, self.fragment) = groups |
|---|
| 127 | | else: |
|---|
| 128 | | # Explicitly provide URI components |
|---|
| 129 | | (self.scheme, self.username, self.password, self.host, self.port, |
|---|
| 130 | | self.path, self.query, self.fragment) = ( |
|---|
| 131 | | scheme, username, password, host, port, path, query, fragment) |
|---|
| 132 | | |
|---|
| 133 | | # Set any remaining defaults |
|---|
| 134 | | if not self.scheme: self.scheme = scheme or '' |
|---|
| 135 | | if not self.username: self.username = username or '' |
|---|
| 136 | | if not self.password: self.password = password or '' |
|---|
| 137 | | if not self.host: self.host = host or '' |
|---|
| 138 | | if not self.port: self.port = port or '' |
|---|
| 139 | | if not self.path: self.path = path or '' |
|---|
| 140 | | if not self.query: self.query = query or {} |
|---|
| 141 | | if not self.fragment: self.fragment = fragment or '' |
|---|
| 142 | | |
|---|
| 143 | | def _set_path(self, path): |
|---|
| 144 | | """Return a normalised path.""" |
|---|
| 145 | | if path: |
|---|
| 146 | | self._path = '/' + posixpath.normpath(path).lstrip('/') |
|---|
| 147 | | else: |
|---|
| 148 | | self._path = '' |
|---|
| 149 | | |
|---|
| 150 | | def _get_path(self): |
|---|
| 151 | | return self._path |
|---|
| 152 | | |
|---|
| 153 | | path = property(_get_path, _set_path) |
|---|
| 154 | | |
|---|
| 155 | | def __cmp__(self, other): |
|---|
| 156 | | """Compare two URI objects. |
|---|
| 157 | | |
|---|
| 158 | | >>> u = URI('http://user:password@www.example.com/some/path?parm=1&parm=2&other=3#fragment') |
|---|
| | 25 | """Parse a URI into its component parts. The `query` component is passed |
|---|
| | 26 | through `cgi.parse_qs()`. |
|---|
| | 27 | |
|---|
| | 28 | scheme://username:password@host/path?query#fragment |
|---|
| | 29 | |
|---|
| | 30 | Each component is available as an attribute of the object. |
|---|
| | 31 | |
|---|
| | 32 | TODO: Support "parameters???" Never seen this in the wild: |
|---|
| | 33 | scheme://username:password@host/path;parameters?query#fragment |
|---|
| | 34 | |
|---|
| | 35 | PS. `urlparse` is not useful. |
|---|
| | 36 | |
|---|
| | 37 | The URI constructor can be passed a string: |
|---|
| | 38 | |
|---|
| | 39 | >>> u = URI('http://user:password@www.example.com:12345/some/path?parm=1&parm=2&other=3#fragment') |
|---|
| | 40 | >>> u |
|---|
| | 41 | URI(u'http://user:password@www.example.com:12345/some/path?other=3&parm=1&parm=2#fragment') |
|---|
| | 42 | >>> u.scheme |
|---|
| | 43 | 'http' |
|---|
| | 44 | >>> u.username |
|---|
| | 45 | 'user' |
|---|
| | 46 | >>> u.password |
|---|
| | 47 | 'password' |
|---|
| | 48 | >>> u.host |
|---|
| | 49 | 'www.example.com' |
|---|
| | 50 | >>> u.path |
|---|
| | 51 | '/some/path' |
|---|
| | 52 | >>> u.query |
|---|
| | 53 | {'parm': ['1', '2'], 'other': ['3']} |
|---|
| | 54 | >>> u.fragment |
|---|
| | 55 | 'fragment' |
|---|
| | 56 | |
|---|
| | 57 | ...or the individual URI components as keyword arguments: |
|---|
| | 58 | |
|---|
| | 59 | >>> URI(scheme='http', username='user', password='password', host='www.example.com', port=12345, path='/some/path', query={'parm': [1, 2], 'other': [3]}, fragment='fragment') |
|---|
| | 60 | URI(u'http://user:password@www.example.com:12345/some/path?other=3&parm=1&parm=2#fragment') |
|---|
| | 61 | |
|---|
| | 62 | ...or a combination of the two, in which case keyword arguments are used as |
|---|
| | 63 | defaults if not provided by the URI: |
|---|
| | 64 | |
|---|
| | 65 | >>> URI('http://localhost', port=80) |
|---|
| | 66 | URI(u'http://localhost:80') |
|---|
| | 67 | |
|---|
| | 68 | ...and finally, another URI object: |
|---|
| | 69 | |
|---|
| 165 | | """ |
|---|
| 166 | | return cmp(repr(self), repr(other)) |
|---|
| 167 | | |
|---|
| 168 | | def __repr__(self): |
|---|
| 169 | | return "URI(u'%s')" % unicode(self) |
|---|
| 170 | | |
|---|
| 171 | | def __str__(self): |
|---|
| 172 | | return unicode(self).encode('utf-8') |
|---|
| 173 | | |
|---|
| 174 | | def __unicode__(self): |
|---|
| 175 | | uri = unicode(self.scheme and (urllib.quote(self.scheme) + u'://') or u'') |
|---|
| 176 | | if self.username or self.password: |
|---|
| 177 | | if self.username: |
|---|
| 178 | | uri += urllib.quote(self.username) |
|---|
| 179 | | if self.password: |
|---|
| 180 | | uri += u':' + urllib.quote(self.password) |
|---|
| 181 | | uri += u'@' |
|---|
| 182 | | uri += urllib.quote(self.host) |
|---|
| 183 | | if self.port: |
|---|
| 184 | | uri += u':%s' % self.port |
|---|
| 185 | | uri += urllib.quote(self.path) |
|---|
| 186 | | if self.query: |
|---|
| 187 | | uri += u'?' + u'&'.join([u'&'.join([u'%s=%s' % |
|---|
| 188 | | (urllib.quote(k), |
|---|
| 189 | | urllib.quote(str(v))) |
|---|
| 190 | | for v in l]) |
|---|
| 191 | | for k, l in sorted(self.query.items())]) |
|---|
| 192 | | if self.fragment: |
|---|
| 193 | | uri += u'#' + urllib.quote(self.fragment) |
|---|
| 194 | | return uri |
|---|
| | 75 | >>> v |
|---|
| | 76 | URI(u'http://user:password@www.example.com:12345/some/path?other=3&parm=1&parm=2#fragment') |
|---|
| | 77 | |
|---|
| | 78 | URI also normalises the path component: |
|---|
| | 79 | |
|---|
| | 80 | >>> URI('http://www.example.com//some/../foo/path/') |
|---|
| | 81 | URI(u'http://www.example.com/foo/path') |
|---|
| | 82 | |
|---|
| | 83 | Query parameters must be passed as a dictionary with list values. Values |
|---|
| | 84 | are encoded automatically: |
|---|
| | 85 | |
|---|
| | 86 | >>> URI('http://localhost', query={'q': ['#1', 'foo']}) |
|---|
| | 87 | URI(u'http://localhost?q=%231&q=foo') |
|---|
| | 88 | """ |
|---|
| | 89 | |
|---|
| | 90 | _pattern = re.compile(r""" |
|---|
| | 91 | (?:(?P<scheme>[^:]+)://)? |
|---|
| | 92 | (?:(?P<username>[^:@]*) |
|---|
| | 93 | (?::(?P<password>[^@]*))?@)? |
|---|
| | 94 | (?P<host>[^?/#:]*) |
|---|
| | 95 | (?::(?P<port>[\d]+))? |
|---|
| | 96 | (?P<path>/[^#?]*)? |
|---|
| | 97 | (?:\?(?P<query>[^#]*))? |
|---|
| | 98 | (?:\#(?P<fragment>.*))? |
|---|
| | 99 | """, re.VERBOSE) |
|---|
| | 100 | |
|---|
| | 101 | __slots__ = ['scheme', 'username', 'password', 'host', 'port', '_path', |
|---|
| | 102 | 'query', 'fragment'] |
|---|
| | 103 | |
|---|
| | 104 | def __init__(self, uri=None, scheme=None, username=None, password=None, |
|---|
| | 105 | host=None, port=None, path=None, query=None, fragment=None): |
|---|
| | 106 | # TODO(aat) Make the logic in this constructor more efficient. |
|---|
| | 107 | self._path = '' |
|---|
| | 108 | query = query or {} |
|---|
| | 109 | # Copy attributes of a URI object |
|---|
| | 110 | if isinstance(uri, URI): |
|---|
| | 111 | from copy import copy |
|---|
| | 112 | (self.scheme, self.username, self.password, self.host, self.port, |
|---|
| | 113 | self.path, self.query, self.fragment) = ( |
|---|
| | 114 | uri.scheme, uri.username, uri.password, uri.host, |
|---|
| | 115 | uri.port, uri.path, copy(uri.query), uri.fragment) |
|---|
| | 116 | elif uri is not None: |
|---|
| | 117 | # Parse URI string |
|---|
| | 118 | from cgi import parse_qs |
|---|
| | 119 | |
|---|
| | 120 | match = self._pattern.match(uri) |
|---|
| | 121 | if match is None: |
|---|
| | 122 | raise ValueError('Invalid URI') |
|---|
| | 123 | groups = [g or '' for g in match.groups()] |
|---|
| | 124 | groups = (map(urllib.unquote, groups[0:6]) + |
|---|
| | 125 | [parse_qs(groups[6] or '')] + |
|---|
| | 126 | map(urllib.unquote, groups[7:])) |
|---|
| | 127 | (self.scheme, self.username, self.password, self.host, self.port, |
|---|
| | 128 | self.path, self.query, self.fragment) = groups |
|---|
| | 129 | else: |
|---|
| | 130 | # Explicitly provide URI components |
|---|
| | 131 | (self.scheme, self.username, self.password, self.host, self.port, |
|---|
| | 132 | self.path, self.query, self.fragment) = ( |
|---|
| | 133 | scheme, username, password, host, port, path, query, fragment) |
|---|
| | 134 | |
|---|
| | 135 | # Set any remaining defaults |
|---|
| | 136 | if not self.scheme: self.scheme = scheme or '' |
|---|
| | 137 | if not self.username: self.username = username or '' |
|---|
| | 138 | if not self.password: self.password = password or '' |
|---|
| | 139 | if not self.host: self.host = host or '' |
|---|
| | 140 | if not self.port: self.port = port or '' |
|---|
| | 141 | if not self.path: self.path = path or '' |
|---|
| | 142 | if not self.query: self.query = query or {} |
|---|
| | 143 | if not self.fragment: self.fragment = fragment or '' |
|---|
| | 144 | |
|---|
| | 145 | def _set_path(self, path): |
|---|
| | 146 | """Return a normalised path.""" |
|---|
| | 147 | if path: |
|---|
| | 148 | self._path = '/' + posixpath.normpath(path).lstrip('/') |
|---|
| | 149 | else: |
|---|
| | 150 | self._path = '' |
|---|
| | 151 | |
|---|
| | 152 | def _get_path(self): |
|---|
| | 153 | return self._path |
|---|
| | 154 | |
|---|
| | 155 | path = property(_get_path, _set_path) |
|---|
| | 156 | |
|---|
| | 157 | def __cmp__(self, other): |
|---|
| | 158 | """Compare two URI objects. |
|---|
| | 159 | |
|---|
| | 160 | >>> u = URI('http://user:password@www.example.com/some/path?parm=1&parm=2&other=3#fragment') |
|---|
| | 161 | >>> v = URI(u) |
|---|
| | 162 | >>> u == v |
|---|
| | 163 | True |
|---|
| | 164 | >>> v.host = 'www.example.com' |
|---|
| | 165 | >>> u == v |
|---|
| | 166 | False |
|---|
| | 167 | """ |
|---|
| | 168 | return cmp(repr(self), repr(other)) |
|---|
| | 169 | |
|---|
| | 170 | def __repr__(self): |
|---|
| | 171 | return "URI(u'%s')" % unicode(self) |
|---|
| | 172 | |
|---|
| | 173 | def __str__(self): |
|---|
| | 174 | return unicode(self).encode('utf-8') |
|---|
| | 175 | |
|---|
| | 176 | def __unicode__(self): |
|---|
| | 177 | uri = unicode(self.scheme and (urllib.quote(self.scheme) + u'://') or u'') |
|---|
| | 178 | if self.username or self.password: |
|---|
| | 179 | if self.username: |
|---|
| | 180 | uri += urllib.quote(self.username) |
|---|
| | 181 | if self.password: |
|---|
| | 182 | uri += u':' + urllib.quote(self.password) |
|---|
| | 183 | uri += u'@' |
|---|
| | 184 | uri += urllib.quote(self.host) |
|---|
| | 185 | if self.port: |
|---|
| | 186 | uri += u':%s' % self.port |
|---|
| | 187 | uri += urllib.quote(self.path) |
|---|
| | 188 | if self.query: |
|---|
| | 189 | uri += u'?' + u'&'.join( |
|---|
| | 190 | [u'&'.join([u'%s=%s' % (urllib.quote(k), urllib.quote(str(v))) |
|---|
| | 191 | for v in l]) |
|---|
| | 192 | for k, l in sorted(self.query.items())]) |
|---|
| | 193 | if self.fragment: |
|---|
| | 194 | uri += u'#' + urllib.quote(self.fragment) |
|---|
| | 195 | return uri |
|---|
| 223 | | """Object hook for simplejson.JSONDecoder to handle custom types.""" |
|---|
| 224 | | |
|---|
| 225 | | ip_re = (r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2' |
|---|
| 226 | | r'[0-4][0-9]|[01]?[0-9][0-9]?)\b') |
|---|
| 227 | | |
|---|
| 228 | | if isinstance(obj, (unicode, str)): |
|---|
| 229 | | try: |
|---|
| 230 | | obj = FromISOTime(obj) |
|---|
| 231 | | except ValueError: |
|---|
| 232 | | pass |
|---|
| | 224 | """Object hook for simplejson.JSONDecoder to handle custom types.""" |
|---|
| | 225 | |
|---|
| | 226 | ip_re = (r'\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2' |
|---|
| | 227 | r'[0-4][0-9]|[01]?[0-9][0-9]?)\b') |
|---|
| | 228 | |
|---|
| | 229 | if isinstance(obj, (unicode, str)): |
|---|
| | 230 | try: |
|---|
| | 231 | obj = FromISOTime(obj) |
|---|
| | 232 | except ValueError: |
|---|
| | 233 | pass |
|---|
| | 234 | |
|---|
| | 235 | elif isinstance(obj, list): |
|---|
| | 236 | obj = map(custom_json_decode_hook, obj) |
|---|
| | 237 | elif isinstance(obj, dict): |
|---|
| | 238 | new_dict = {} |
|---|
| | 239 | for key in obj: |
|---|
| | 240 | new_dict[custom_json_decode_hook(key)] = \ |
|---|
| | 241 | custom_json_decode_hook(obj[key]) |
|---|
| | 242 | obj = new_dict |
|---|
| | 243 | |
|---|
| | 244 | return obj |
|---|
| | 245 | |
|---|
| | 246 | |
|---|
| | 247 | def json_encode(data): |
|---|
| | 248 | """Encode data as JSON. |
|---|
| | 249 | |
|---|
| | 250 | Args: |
|---|
| | 251 | data: Data to encode. |
|---|
| | 252 | |
|---|
| | 253 | Returns: |
|---|
| | 254 | JSON encoded string. |
|---|
| | 255 | """ |
|---|
| | 256 | return simplejson.dumps(data, cls=CustomJSONEncoder) |
|---|
| | 257 | |
|---|
| | 258 | |
|---|
| | 259 | def json_decode(data): |
|---|
| | 260 | """Decode JSON data to its raw form. |
|---|
| | 261 | |
|---|
| | 262 | Args: |
|---|
| | 263 | data Data to decode. |
|---|
| | 264 | |
|---|
| | 265 | Returns: |
|---|
| | 266 | A decoded object. |
|---|
| | 267 | """ |
|---|
| | 268 | return simplejson.loads(data, object_hook=custom_json_decode_hook) |
|---|
| | 269 | |
|---|
| | 270 | |
|---|
| | 271 | def to_iso_time(dtime): |
|---|
| | 272 | """Convert a datetime object into a ISO8601 formatted datetime string. |
|---|
| | 273 | |
|---|
| | 274 | Args: |
|---|
| | 275 | dtime: datetime.datetime object |
|---|
| | 276 | |
|---|
| | 277 | Returns: |
|---|
| | 278 | ISO8601 formatted datetime string |
|---|
| | 279 | """ |
|---|
| | 280 | return iso8601.tostring(time.mktime(dtime.timetuple())) |
|---|
| | 281 | |
|---|
| | 282 | |
|---|
| | 283 | def from_iso_time(iso): |
|---|
| | 284 | """Convert an ISO8601 formatted string into a datetime object. |
|---|
| | 285 | |
|---|
| | 286 | Args: |
|---|
| | 287 | iso: ISO8601 formatted string. |
|---|