Changeset 91167 in webkit
- Timestamp:
- Jul 17, 2011 1:57:09 AM (13 years ago)
- Location:
- trunk/Tools
- Files:
-
- 2 added
- 8 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/Tools/ChangeLog
r91166 r91167 1 2011-07-17 Philippe Normand <pnormand@igalia.com> 2 3 test-webkitpy failing with Python 2.5 4 https://bugs.webkit.org/show_bug.cgi?id=64594 5 6 Reviewed by Eric Seidel. 7 8 Updated simplejson to version 2.1.6 that fixes an issue when 9 dumping slashes. Also use cgi.parse_qs instead of the urlparse 10 version to be compatible with Python2.5. Verified the fix with 11 test-webkitpy on python 2.5 and python 2.6. 12 13 * Scripts/webkitpy/thirdparty/simplejson/README.txt: 14 * Scripts/webkitpy/thirdparty/simplejson/__init__.py: 15 * Scripts/webkitpy/thirdparty/simplejson/_speedups.c: 16 (json_PyOS_string_to_double): 17 (_convertPyInt_AsSsize_t): 18 (_convertPyInt_FromSsize_t): 19 (ascii_escape_char): 20 (ascii_escape_unicode): 21 (ascii_escape_str): 22 (raise_errmsg): 23 (join_list_unicode): 24 (join_list_string): 25 (_build_rval_index_tuple): 26 (scanstring_str): 27 (scanstring_unicode): 28 (py_scanstring): 29 (py_encode_basestring_ascii): 30 (scanner_dealloc): 31 (scanner_traverse): 32 (scanner_clear): 33 (_parse_object_str): 34 (_parse_object_unicode): 35 (_parse_array_str): 36 (_parse_array_unicode): 37 (_parse_constant): 38 (_match_number_str): 39 (_match_number_unicode): 40 (scan_once_str): 41 (scan_once_unicode): 42 (scanner_call): 43 (scanner_new): 44 (scanner_init): 45 (encoder_new): 46 (encoder_init): 47 (encoder_call): 48 (_encoded_const): 49 (encoder_encode_float): 50 (encoder_encode_string): 51 (_steal_list_append): 52 (encoder_listencode_obj): 53 (encoder_listencode_dict): 54 (encoder_listencode_list): 55 (encoder_dealloc): 56 (encoder_traverse): 57 (encoder_clear): 58 (init_speedups): 59 * Scripts/webkitpy/thirdparty/simplejson/decoder.py: 60 * Scripts/webkitpy/thirdparty/simplejson/encoder.py: 61 * Scripts/webkitpy/thirdparty/simplejson/ordered_dict.py: Added. 62 * Scripts/webkitpy/thirdparty/simplejson/scanner.py: 63 * Scripts/webkitpy/thirdparty/simplejson/tool.py: Added. 64 * Scripts/webkitpy/tool/servers/reflectionhandler.py: 65 1 66 2011-07-17 Dimitri Glazkov <dglazkov@chromium.org> 2 67 -
trunk/Tools/Scripts/webkitpy/thirdparty/simplejson/README.txt
r54087 r91167 1 1 URL: http://undefined.org/python/#simplejson 2 Version: 1.7.32 Version: 2.1.6 3 3 License: MIT 4 4 License File: LICENSE.txt -
trunk/Tools/Scripts/webkitpy/thirdparty/simplejson/__init__.py
r54087 r91167 1 r""" 2 A simple, fast, extensible JSON encoder and decoder 3 4 JSON (JavaScript Object Notation) <http://json.org> is a subset of 1 r"""JSON (JavaScript Object Notation) <http://json.org> is a subset of 5 2 JavaScript syntax (ECMA-262 3rd edition) used as a lightweight data 6 3 interchange format. 7 4 8 simplejson exposes an API familiar to uses of the standard library 9 marshal and pickle modules. 5 :mod:`simplejson` exposes an API familiar to users of the standard library 6 :mod:`marshal` and :mod:`pickle` modules. It is the externally maintained 7 version of the :mod:`json` library contained in Python 2.6, but maintains 8 compatibility with Python 2.4 and Python 2.5 and (currently) has 9 significant performance advantages, even without using the optional C 10 extension for speedups. 10 11 11 12 Encoding basic Python object hierarchies:: 12 13 >>> import simplejson 14 >>> simplejson.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}])13 14 >>> import simplejson as json 15 >>> json.dumps(['foo', {'bar': ('baz', None, 1.0, 2)}]) 15 16 '["foo", {"bar": ["baz", null, 1.0, 2]}]' 16 >>> print simplejson.dumps("\"foo\bar")17 >>> print json.dumps("\"foo\bar") 17 18 "\"foo\bar" 18 >>> print simplejson.dumps(u'\u1234')19 >>> print json.dumps(u'\u1234') 19 20 "\u1234" 20 >>> print simplejson.dumps('\\')21 >>> print json.dumps('\\') 21 22 "\\" 22 >>> print simplejson.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True)23 >>> print json.dumps({"c": 0, "b": 0, "a": 0}, sort_keys=True) 23 24 {"a": 0, "b": 0, "c": 0} 24 25 >>> from StringIO import StringIO 25 26 >>> io = StringIO() 26 >>> simplejson.dump(['streaming API'], io)27 >>> json.dump(['streaming API'], io) 27 28 >>> io.getvalue() 28 29 '["streaming API"]' … … 30 31 Compact encoding:: 31 32 32 >>> import simplejson 33 >>> simplejson.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':'))33 >>> import simplejson as json 34 >>> json.dumps([1,2,3,{'4': 5, '6': 7}], separators=(',',':')) 34 35 '[1,2,3,{"4":5,"6":7}]' 35 36 36 37 Pretty printing:: 37 38 38 >>> import simplejson 39 >>> print simplejson.dumps({'4': 5, '6': 7}, sort_keys=True, indent=4) 39 >>> import simplejson as json 40 >>> s = json.dumps({'4': 5, '6': 7}, sort_keys=True, indent=' ') 41 >>> print '\n'.join([l.rstrip() for l in s.splitlines()]) 40 42 { 41 "4": 5, 43 "4": 5, 42 44 "6": 7 43 45 } 44 46 45 47 Decoding JSON:: 46 47 >>> import simplejson 48 >>> simplejson.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') 49 [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] 50 >>> simplejson.loads('"\\"foo\\bar"') 51 u'"foo\x08ar' 48 49 >>> import simplejson as json 50 >>> obj = [u'foo', {u'bar': [u'baz', None, 1.0, 2]}] 51 >>> json.loads('["foo", {"bar":["baz", null, 1.0, 2]}]') == obj 52 True 53 >>> json.loads('"\\"foo\\bar"') == u'"foo\x08ar' 54 True 52 55 >>> from StringIO import StringIO 53 56 >>> io = StringIO('["streaming API"]') 54 >>> simplejson.load(io)55 [u'streaming API']57 >>> json.load(io)[0] == 'streaming API' 58 True 56 59 57 60 Specializing JSON object decoding:: 58 61 59 >>> import simplejson 62 >>> import simplejson as json 60 63 >>> def as_complex(dct): 61 64 ... if '__complex__' in dct: 62 65 ... return complex(dct['real'], dct['imag']) 63 66 ... return dct 64 ... 65 >>> simplejson.loads('{"__complex__": true, "real": 1, "imag": 2}',67 ... 68 >>> json.loads('{"__complex__": true, "real": 1, "imag": 2}', 66 69 ... object_hook=as_complex) 67 70 (1+2j) 68 69 Extending JSONEncoder:: 70 71 >>> import simplejson 72 >>> class ComplexEncoder(simplejson.JSONEncoder): 73 ... def default(self, obj): 74 ... if isinstance(obj, complex): 75 ... return [obj.real, obj.imag] 76 ... return simplejson.JSONEncoder.default(self, obj) 77 ... 78 >>> dumps(2 + 1j, cls=ComplexEncoder) 71 >>> from decimal import Decimal 72 >>> json.loads('1.1', parse_float=Decimal) == Decimal('1.1') 73 True 74 75 Specializing JSON object encoding:: 76 77 >>> import simplejson as json 78 >>> def encode_complex(obj): 79 ... if isinstance(obj, complex): 80 ... return [obj.real, obj.imag] 81 ... raise TypeError(repr(o) + " is not JSON serializable") 82 ... 83 >>> json.dumps(2 + 1j, default=encode_complex) 79 84 '[2.0, 1.0]' 80 >>> ComplexEncoder().encode(2 + 1j)85 >>> json.JSONEncoder(default=encode_complex).encode(2 + 1j) 81 86 '[2.0, 1.0]' 82 >>> list(ComplexEncoder().iterencode(2 + 1j)) 83 ['[', '2.0', ', ', '1.0', ']'] 84 85 86 Note that the JSON produced by this module's default settings 87 is a subset of YAML, so it may be used as a serializer for that as well. 87 >>> ''.join(json.JSONEncoder(default=encode_complex).iterencode(2 + 1j)) 88 '[2.0, 1.0]' 89 90 91 Using simplejson.tool from the shell to validate and pretty-print:: 92 93 $ echo '{"json":"obj"}' | python -m simplejson.tool 94 { 95 "json": "obj" 96 } 97 $ echo '{ 1.2:3.4}' | python -m simplejson.tool 98 Expecting property name: line 1 column 2 (char 2) 88 99 """ 89 __version__ = ' 1.7.3'100 __version__ = '2.1.6' 90 101 __all__ = [ 91 102 'dump', 'dumps', 'load', 'loads', 92 'JSONDecoder', 'JSONEncoder', 103 'JSONDecoder', 'JSONDecodeError', 'JSONEncoder', 104 'OrderedDict', 93 105 ] 94 106 95 from decoder import JSONDecoder 107 __author__ = 'Bob Ippolito <bob@redivi.com>' 108 109 from decimal import Decimal 110 111 from decoder import JSONDecoder, JSONDecodeError 96 112 from encoder import JSONEncoder 113 def _import_OrderedDict(): 114 import collections 115 try: 116 return collections.OrderedDict 117 except AttributeError: 118 import ordered_dict 119 return ordered_dict.OrderedDict 120 OrderedDict = _import_OrderedDict() 121 122 def _import_c_make_encoder(): 123 try: 124 from simplejson._speedups import make_encoder 125 return make_encoder 126 except ImportError: 127 return None 97 128 98 129 _default_encoder = JSONEncoder( … … 103 134 indent=None, 104 135 separators=None, 105 encoding='utf-8' 136 encoding='utf-8', 137 default=None, 138 use_decimal=False, 106 139 ) 107 140 108 141 def dump(obj, fp, skipkeys=False, ensure_ascii=True, check_circular=True, 109 142 allow_nan=True, cls=None, indent=None, separators=None, 110 encoding='utf-8', **kw): 111 """ 112 Serialize ``obj`` as a JSON formatted stream to ``fp`` (a 143 encoding='utf-8', default=None, use_decimal=False, **kw): 144 """Serialize ``obj`` as a JSON formatted stream to ``fp`` (a 113 145 ``.write()``-supporting file-like object). 114 146 115 If ``skipkeys`` is ``True``then ``dict`` keys that are not basic types116 (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) 147 If ``skipkeys`` is true then ``dict`` keys that are not basic types 148 (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) 117 149 will be skipped instead of raising a ``TypeError``. 118 150 119 If ``ensure_ascii`` is ``False``, then the some chunks written to ``fp``151 If ``ensure_ascii`` is false, then the some chunks written to ``fp`` 120 152 may be ``unicode`` instances, subject to normal Python ``str`` to 121 153 ``unicode`` coercion rules. Unless ``fp.write()`` explicitly … … 123 155 to cause an error. 124 156 125 If ``check_circular`` is ``False``, then the circular reference check157 If ``check_circular`` is false, then the circular reference check 126 158 for container types will be skipped and a circular reference will 127 159 result in an ``OverflowError`` (or worse). 128 160 129 If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to161 If ``allow_nan`` is false, then it will be a ``ValueError`` to 130 162 serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) 131 163 in strict compliance of the JSON specification, instead of using the 132 164 JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). 133 165 134 If ``indent`` is a non-negative integer, then JSON array elements and object 135 members will be pretty-printed with that indent level. An indent level 136 of 0 will only insert newlines. ``None`` is the most compact representation. 166 If *indent* is a string, then JSON array elements and object members 167 will be pretty-printed with a newline followed by that string repeated 168 for each level of nesting. ``None`` (the default) selects the most compact 169 representation without any newlines. For backwards compatibility with 170 versions of simplejson earlier than 2.1.0, an integer is also accepted 171 and is converted to a string with that many spaces. 137 172 138 173 If ``separators`` is an ``(item_separator, dict_separator)`` tuple … … 142 177 ``encoding`` is the character encoding for str instances, default is UTF-8. 143 178 179 ``default(obj)`` is a function that should return a serializable version 180 of obj or raise TypeError. The default simply raises TypeError. 181 182 If *use_decimal* is true (default: ``False``) then decimal.Decimal 183 will be natively serialized to JSON with full precision. 184 144 185 To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the 145 186 ``.default()`` method to serialize additional types), specify it with 146 187 the ``cls`` kwarg. 188 147 189 """ 148 190 # cached encoder 149 if ( skipkeys is False and ensure_ascii is Trueand150 check_circular is True and allow_nan is Trueand191 if (not skipkeys and ensure_ascii and 192 check_circular and allow_nan and 151 193 cls is None and indent is None and separators is None and 152 encoding == 'utf-8' and not kw): 194 encoding == 'utf-8' and default is None and not use_decimal 195 and not kw): 153 196 iterable = _default_encoder.iterencode(obj) 154 197 else: … … 157 200 iterable = cls(skipkeys=skipkeys, ensure_ascii=ensure_ascii, 158 201 check_circular=check_circular, allow_nan=allow_nan, indent=indent, 159 separators=separators, encoding=encoding, **kw).iterencode(obj) 202 separators=separators, encoding=encoding, 203 default=default, use_decimal=use_decimal, **kw).iterencode(obj) 160 204 # could accelerate with writelines in some versions of Python, at 161 205 # a debuggability cost … … 166 210 def dumps(obj, skipkeys=False, ensure_ascii=True, check_circular=True, 167 211 allow_nan=True, cls=None, indent=None, separators=None, 168 encoding='utf-8', **kw): 169 """ 170 Serialize ``obj`` to a JSON formatted ``str``. 171 172 If ``skipkeys`` is ``True`` then ``dict`` keys that are not basic types 173 (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) 212 encoding='utf-8', default=None, use_decimal=False, **kw): 213 """Serialize ``obj`` to a JSON formatted ``str``. 214 215 If ``skipkeys`` is false then ``dict`` keys that are not basic types 216 (``str``, ``unicode``, ``int``, ``long``, ``float``, ``bool``, ``None``) 174 217 will be skipped instead of raising a ``TypeError``. 175 218 176 If ``ensure_ascii`` is ``False``, then the return value will be a219 If ``ensure_ascii`` is false, then the return value will be a 177 220 ``unicode`` instance subject to normal Python ``str`` to ``unicode`` 178 221 coercion rules instead of being escaped to an ASCII ``str``. 179 222 180 If ``check_circular`` is ``False``, then the circular reference check223 If ``check_circular`` is false, then the circular reference check 181 224 for container types will be skipped and a circular reference will 182 225 result in an ``OverflowError`` (or worse). 183 226 184 If ``allow_nan`` is ``False``, then it will be a ``ValueError`` to227 If ``allow_nan`` is false, then it will be a ``ValueError`` to 185 228 serialize out of range ``float`` values (``nan``, ``inf``, ``-inf``) in 186 229 strict compliance of the JSON specification, instead of using the 187 230 JavaScript equivalents (``NaN``, ``Infinity``, ``-Infinity``). 188 231 189 If ``indent`` is a non-negative integer, then JSON array elements and 190 object members will be pretty-printed with that indent level. An indent 191 level of 0 will only insert newlines. ``None`` is the most compact 192 representation. 232 If ``indent`` is a string, then JSON array elements and object members 233 will be pretty-printed with a newline followed by that string repeated 234 for each level of nesting. ``None`` (the default) selects the most compact 235 representation without any newlines. For backwards compatibility with 236 versions of simplejson earlier than 2.1.0, an integer is also accepted 237 and is converted to a string with that many spaces. 193 238 194 239 If ``separators`` is an ``(item_separator, dict_separator)`` tuple … … 198 243 ``encoding`` is the character encoding for str instances, default is UTF-8. 199 244 245 ``default(obj)`` is a function that should return a serializable version 246 of obj or raise TypeError. The default simply raises TypeError. 247 248 If *use_decimal* is true (default: ``False``) then decimal.Decimal 249 will be natively serialized to JSON with full precision. 250 200 251 To use a custom ``JSONEncoder`` subclass (e.g. one that overrides the 201 252 ``.default()`` method to serialize additional types), specify it with 202 253 the ``cls`` kwarg. 254 203 255 """ 204 256 # cached encoder 205 if ( skipkeys is False and ensure_ascii is Trueand206 check_circular is True and allow_nan is Trueand257 if (not skipkeys and ensure_ascii and 258 check_circular and allow_nan and 207 259 cls is None and indent is None and separators is None and 208 encoding == 'utf-8' and not kw): 260 encoding == 'utf-8' and default is None and not use_decimal 261 and not kw): 209 262 return _default_encoder.encode(obj) 210 263 if cls is None: … … 213 266 skipkeys=skipkeys, ensure_ascii=ensure_ascii, 214 267 check_circular=check_circular, allow_nan=allow_nan, indent=indent, 215 separators=separators, encoding=encoding, 216 **kw).encode(obj) 217 218 _default_decoder = JSONDecoder(encoding=None, object_hook=None) 219 220 def load(fp, encoding=None, cls=None, object_hook=None, **kw): 221 """ 222 Deserialize ``fp`` (a ``.read()``-supporting file-like object containing 268 separators=separators, encoding=encoding, default=default, 269 use_decimal=use_decimal, **kw).encode(obj) 270 271 272 _default_decoder = JSONDecoder(encoding=None, object_hook=None, 273 object_pairs_hook=None) 274 275 276 def load(fp, encoding=None, cls=None, object_hook=None, parse_float=None, 277 parse_int=None, parse_constant=None, object_pairs_hook=None, 278 use_decimal=False, **kw): 279 """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing 223 280 a JSON document) to a Python object. 224 281 225 If the contents of ``fp`` is encoded with an ASCII based encoding other 226 than utf-8 (e.g. latin-1), then an appropriate ``encoding`` name must 227 be specified. Encodings that are not ASCII based (such as UCS-2) are 228 not allowed, and should be wrapped with 229 ``codecs.getreader(fp)(encoding)``, or simply decoded to a ``unicode`` 230 object and passed to ``loads()`` 231 232 ``object_hook`` is an optional function that will be called with the 233 result of any object literal decode (a ``dict``). The return value of 234 ``object_hook`` will be used instead of the ``dict``. This feature 235 can be used to implement custom decoders (e.g. JSON-RPC class hinting). 236 282 *encoding* determines the encoding used to interpret any 283 :class:`str` objects decoded by this instance (``'utf-8'`` by 284 default). It has no effect when decoding :class:`unicode` objects. 285 286 Note that currently only encodings that are a superset of ASCII work, 287 strings of other encodings should be passed in as :class:`unicode`. 288 289 *object_hook*, if specified, will be called with the result of every 290 JSON object decoded and its return value will be used in place of the 291 given :class:`dict`. This can be used to provide custom 292 deserializations (e.g. to support JSON-RPC class hinting). 293 294 *object_pairs_hook* is an optional function that will be called with 295 the result of any object literal decode with an ordered list of pairs. 296 The return value of *object_pairs_hook* will be used instead of the 297 :class:`dict`. This feature can be used to implement custom decoders 298 that rely on the order that the key and value pairs are decoded (for 299 example, :func:`collections.OrderedDict` will remember the order of 300 insertion). If *object_hook* is also defined, the *object_pairs_hook* 301 takes priority. 302 303 *parse_float*, if specified, will be called with the string of every 304 JSON float to be decoded. By default, this is equivalent to 305 ``float(num_str)``. This can be used to use another datatype or parser 306 for JSON floats (e.g. :class:`decimal.Decimal`). 307 308 *parse_int*, if specified, will be called with the string of every 309 JSON int to be decoded. By default, this is equivalent to 310 ``int(num_str)``. This can be used to use another datatype or parser 311 for JSON integers (e.g. :class:`float`). 312 313 *parse_constant*, if specified, will be called with one of the 314 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This 315 can be used to raise an exception if invalid JSON numbers are 316 encountered. 317 318 If *use_decimal* is true (default: ``False``) then it implies 319 parse_float=decimal.Decimal for parity with ``dump``. 320 237 321 To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` 238 322 kwarg. 323 239 324 """ 240 325 return loads(fp.read(), 241 encoding=encoding, cls=cls, object_hook=object_hook, **kw) 242 243 def loads(s, encoding=None, cls=None, object_hook=None, **kw): 244 """ 245 Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON 326 encoding=encoding, cls=cls, object_hook=object_hook, 327 parse_float=parse_float, parse_int=parse_int, 328 parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, 329 use_decimal=use_decimal, **kw) 330 331 332 def loads(s, encoding=None, cls=None, object_hook=None, parse_float=None, 333 parse_int=None, parse_constant=None, object_pairs_hook=None, 334 use_decimal=False, **kw): 335 """Deserialize ``s`` (a ``str`` or ``unicode`` instance containing a JSON 246 336 document) to a Python object. 247 337 248 If ``s`` is a ``str`` instance and is encoded with an ASCII based encoding 249 other than utf-8 (e.g. latin-1) then an appropriate ``encoding`` name 250 must be specified. Encodings that are not ASCII based (such as UCS-2) 251 are not allowed and should be decoded to ``unicode`` first. 252 253 ``object_hook`` is an optional function that will be called with the 254 result of any object literal decode (a ``dict``). The return value of 255 ``object_hook`` will be used instead of the ``dict``. This feature 256 can be used to implement custom decoders (e.g. JSON-RPC class hinting). 338 *encoding* determines the encoding used to interpret any 339 :class:`str` objects decoded by this instance (``'utf-8'`` by 340 default). It has no effect when decoding :class:`unicode` objects. 341 342 Note that currently only encodings that are a superset of ASCII work, 343 strings of other encodings should be passed in as :class:`unicode`. 344 345 *object_hook*, if specified, will be called with the result of every 346 JSON object decoded and its return value will be used in place of the 347 given :class:`dict`. This can be used to provide custom 348 deserializations (e.g. to support JSON-RPC class hinting). 349 350 *object_pairs_hook* is an optional function that will be called with 351 the result of any object literal decode with an ordered list of pairs. 352 The return value of *object_pairs_hook* will be used instead of the 353 :class:`dict`. This feature can be used to implement custom decoders 354 that rely on the order that the key and value pairs are decoded (for 355 example, :func:`collections.OrderedDict` will remember the order of 356 insertion). If *object_hook* is also defined, the *object_pairs_hook* 357 takes priority. 358 359 *parse_float*, if specified, will be called with the string of every 360 JSON float to be decoded. By default, this is equivalent to 361 ``float(num_str)``. This can be used to use another datatype or parser 362 for JSON floats (e.g. :class:`decimal.Decimal`). 363 364 *parse_int*, if specified, will be called with the string of every 365 JSON int to be decoded. By default, this is equivalent to 366 ``int(num_str)``. This can be used to use another datatype or parser 367 for JSON integers (e.g. :class:`float`). 368 369 *parse_constant*, if specified, will be called with one of the 370 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This 371 can be used to raise an exception if invalid JSON numbers are 372 encountered. 373 374 If *use_decimal* is true (default: ``False``) then it implies 375 parse_float=decimal.Decimal for parity with ``dump``. 257 376 258 377 To use a custom ``JSONDecoder`` subclass, specify it with the ``cls`` 259 378 kwarg. 379 260 380 """ 261 if cls is None and encoding is None and object_hook is None and not kw: 381 if (cls is None and encoding is None and object_hook is None and 382 parse_int is None and parse_float is None and 383 parse_constant is None and object_pairs_hook is None 384 and not use_decimal and not kw): 262 385 return _default_decoder.decode(s) 263 386 if cls is None: … … 265 388 if object_hook is not None: 266 389 kw['object_hook'] = object_hook 390 if object_pairs_hook is not None: 391 kw['object_pairs_hook'] = object_pairs_hook 392 if parse_float is not None: 393 kw['parse_float'] = parse_float 394 if parse_int is not None: 395 kw['parse_int'] = parse_int 396 if parse_constant is not None: 397 kw['parse_constant'] = parse_constant 398 if use_decimal: 399 if parse_float is not None: 400 raise TypeError("use_decimal=True implies parse_float=Decimal") 401 kw['parse_float'] = Decimal 267 402 return cls(encoding=encoding, **kw).decode(s) 268 403 269 def read(s): 270 """ 271 json-py API compatibility hook. Use loads(s) instead. 272 """ 273 import warnings 274 warnings.warn("simplejson.loads(s) should be used instead of read(s)", 275 DeprecationWarning) 276 return loads(s) 277 278 def write(obj): 279 """ 280 json-py API compatibility hook. Use dumps(s) instead. 281 """ 282 import warnings 283 warnings.warn("simplejson.dumps(s) should be used instead of write(s)", 284 DeprecationWarning) 285 return dumps(obj) 286 287 404 405 def _toggle_speedups(enabled): 406 import simplejson.decoder as dec 407 import simplejson.encoder as enc 408 import simplejson.scanner as scan 409 c_make_encoder = _import_c_make_encoder() 410 if enabled: 411 dec.scanstring = dec.c_scanstring or dec.py_scanstring 412 enc.c_make_encoder = c_make_encoder 413 enc.encode_basestring_ascii = (enc.c_encode_basestring_ascii or 414 enc.py_encode_basestring_ascii) 415 scan.make_scanner = scan.c_make_scanner or scan.py_make_scanner 416 else: 417 dec.scanstring = dec.py_scanstring 418 enc.c_make_encoder = None 419 enc.encode_basestring_ascii = enc.py_encode_basestring_ascii 420 scan.make_scanner = scan.py_make_scanner 421 dec.make_scanner = scan.make_scanner 422 global _default_decoder 423 _default_decoder = JSONDecoder( 424 encoding=None, 425 object_hook=None, 426 object_pairs_hook=None, 427 ) 428 global _default_encoder 429 _default_encoder = JSONEncoder( 430 skipkeys=False, 431 ensure_ascii=True, 432 check_circular=True, 433 allow_nan=True, 434 indent=None, 435 separators=None, 436 encoding='utf-8', 437 default=None, 438 ) -
trunk/Tools/Scripts/webkitpy/thirdparty/simplejson/_speedups.c
r54087 r91167 1 1 #include "Python.h" 2 #include "structmember.h" 3 #if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double) 4 #define PyOS_string_to_double json_PyOS_string_to_double 5 static double 6 json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception); 7 static double 8 json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) { 9 double x; 10 assert(endptr == NULL); 11 assert(overflow_exception == NULL); 12 PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;) 13 x = PyOS_ascii_atof(s); 14 PyFPE_END_PROTECT(x) 15 return x; 16 } 17 #endif 18 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) 19 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) 20 #endif 21 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_SIZE) 22 #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size) 23 #endif 2 24 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) 3 25 typedef int Py_ssize_t; 4 26 #define PY_SSIZE_T_MAX INT_MAX 5 27 #define PY_SSIZE_T_MIN INT_MIN 28 #define PyInt_FromSsize_t PyInt_FromLong 29 #define PyInt_AsSsize_t PyInt_AsLong 6 30 #endif 31 #ifndef Py_IS_FINITE 32 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) 33 #endif 34 35 #ifdef __GNUC__ 36 #define UNUSED __attribute__((__unused__)) 37 #else 38 #define UNUSED 39 #endif 40 41 #define DEFAULT_ENCODING "utf-8" 42 43 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) 44 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) 45 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) 46 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) 47 #define Decimal_Check(op) (PyObject_TypeCheck(op, DecimalTypePtr)) 48 49 static PyTypeObject PyScannerType; 50 static PyTypeObject PyEncoderType; 51 static PyTypeObject *DecimalTypePtr; 52 53 typedef struct _PyScannerObject { 54 PyObject_HEAD 55 PyObject *encoding; 56 PyObject *strict; 57 PyObject *object_hook; 58 PyObject *pairs_hook; 59 PyObject *parse_float; 60 PyObject *parse_int; 61 PyObject *parse_constant; 62 PyObject *memo; 63 } PyScannerObject; 64 65 static PyMemberDef scanner_members[] = { 66 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, 67 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, 68 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, 69 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, 70 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, 71 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, 72 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, 73 {NULL} 74 }; 75 76 typedef struct _PyEncoderObject { 77 PyObject_HEAD 78 PyObject *markers; 79 PyObject *defaultfn; 80 PyObject *encoder; 81 PyObject *indent; 82 PyObject *key_separator; 83 PyObject *item_separator; 84 PyObject *sort_keys; 85 PyObject *skipkeys; 86 PyObject *key_memo; 87 int fast_encode; 88 int allow_nan; 89 int use_decimal; 90 } PyEncoderObject; 91 92 static PyMemberDef encoder_members[] = { 93 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, 94 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, 95 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, 96 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, 97 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, 98 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, 99 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, 100 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, 101 {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"}, 102 {NULL} 103 }; 7 104 8 105 static Py_ssize_t … … 13 110 ascii_escape_str(PyObject *pystr); 14 111 static PyObject * 15 py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr);112 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); 16 113 void init_speedups(void); 17 18 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"') 114 static PyObject * 115 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); 116 static PyObject * 117 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); 118 static PyObject * 119 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); 120 static PyObject * 121 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); 122 static int 123 scanner_init(PyObject *self, PyObject *args, PyObject *kwds); 124 static void 125 scanner_dealloc(PyObject *self); 126 static int 127 scanner_clear(PyObject *self); 128 static PyObject * 129 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); 130 static int 131 encoder_init(PyObject *self, PyObject *args, PyObject *kwds); 132 static void 133 encoder_dealloc(PyObject *self); 134 static int 135 encoder_clear(PyObject *self); 136 static int 137 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); 138 static int 139 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); 140 static int 141 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); 142 static PyObject * 143 _encoded_const(PyObject *obj); 144 static void 145 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); 146 static PyObject * 147 encoder_encode_string(PyEncoderObject *s, PyObject *obj); 148 static int 149 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); 150 static PyObject * 151 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); 152 static PyObject * 153 encoder_encode_float(PyEncoderObject *s, PyObject *obj); 154 155 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') 156 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) 19 157 20 158 #define MIN_EXPANSION 6 … … 25 163 #endif 26 164 165 static int 166 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) 167 { 168 /* PyObject to Py_ssize_t converter */ 169 *size_ptr = PyInt_AsSsize_t(o); 170 if (*size_ptr == -1 && PyErr_Occurred()) 171 return 0; 172 return 1; 173 } 174 175 static PyObject * 176 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) 177 { 178 /* Py_ssize_t to PyObject converter */ 179 return PyInt_FromSsize_t(*size_ptr); 180 } 181 27 182 static Py_ssize_t 28 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) { 29 Py_UNICODE x; 183 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) 184 { 185 /* Escape unicode code point c to ASCII escape sequences 186 in char *output. output must have at least 12 bytes unused to 187 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ 30 188 output[chars++] = '\\'; 31 189 switch (c) { 32 case '/': output[chars++] = (char)c; break;33 190 case '\\': output[chars++] = (char)c; break; 34 191 case '"': output[chars++] = (char)c; break; … … 45 202 c = 0xd800 | ((v >> 10) & 0x3ff); 46 203 output[chars++] = 'u'; 47 x = (c & 0xf000) >> 12; 48 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 49 x = (c & 0x0f00) >> 8; 50 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 51 x = (c & 0x00f0) >> 4; 52 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 53 x = (c & 0x000f); 54 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 204 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; 205 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; 206 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; 207 output[chars++] = "0123456789abcdef"[(c ) & 0xf]; 55 208 c = 0xdc00 | (v & 0x3ff); 56 209 output[chars++] = '\\'; … … 58 211 #endif 59 212 output[chars++] = 'u'; 60 x = (c & 0xf000) >> 12; 61 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 62 x = (c & 0x0f00) >> 8; 63 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 64 x = (c & 0x00f0) >> 4; 65 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 66 x = (c & 0x000f); 67 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 213 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; 214 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; 215 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; 216 output[chars++] = "0123456789abcdef"[(c ) & 0xf]; 68 217 } 69 218 return chars; … … 71 220 72 221 static PyObject * 73 ascii_escape_unicode(PyObject *pystr) { 222 ascii_escape_unicode(PyObject *pystr) 223 { 224 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ 74 225 Py_ssize_t i; 75 226 Py_ssize_t input_chars; 76 227 Py_ssize_t output_size; 228 Py_ssize_t max_output_size; 77 229 Py_ssize_t chars; 78 230 PyObject *rval; … … 82 234 input_chars = PyUnicode_GET_SIZE(pystr); 83 235 input_unicode = PyUnicode_AS_UNICODE(pystr); 236 84 237 /* One char input can be up to 6 chars output, estimate 4 of these */ 85 238 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 239 max_output_size = 2 + (input_chars * MAX_EXPANSION); 86 240 rval = PyString_FromStringAndSize(NULL, output_size); 87 241 if (rval == NULL) { … … 95 249 if (S_CHAR(c)) { 96 250 output[chars++] = (char)c; 97 } else { 251 } 252 else { 98 253 chars = ascii_escape_char(c, output, chars); 99 254 } 100 255 if (output_size - chars < (1 + MAX_EXPANSION)) { 101 256 /* There's more than four, so let's resize by a lot */ 102 output_size *=2;257 Py_ssize_t new_output_size = output_size * 2; 103 258 /* This is an upper bound */ 104 if (output_size > 2 + (input_chars * MAX_EXPANSION)) { 105 output_size = 2 + (input_chars * MAX_EXPANSION); 106 } 107 if (_PyString_Resize(&rval, output_size) == -1) { 108 return NULL; 109 } 110 output = PyString_AS_STRING(rval); 259 if (new_output_size > max_output_size) { 260 new_output_size = max_output_size; 261 } 262 /* Make sure that the output size changed before resizing */ 263 if (new_output_size != output_size) { 264 output_size = new_output_size; 265 if (_PyString_Resize(&rval, output_size) == -1) { 266 return NULL; 267 } 268 output = PyString_AS_STRING(rval); 269 } 111 270 } 112 271 } … … 119 278 120 279 static PyObject * 121 ascii_escape_str(PyObject *pystr) { 280 ascii_escape_str(PyObject *pystr) 281 { 282 /* Take a PyString pystr and return a new ASCII-only escaped PyString */ 122 283 Py_ssize_t i; 123 284 Py_ssize_t input_chars; … … 130 291 input_chars = PyString_GET_SIZE(pystr); 131 292 input_str = PyString_AS_STRING(pystr); 132 /* One char input can be up to 6 chars output, estimate 4 of these */ 133 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 293 294 /* Fast path for a string that's already ASCII */ 295 for (i = 0; i < input_chars; i++) { 296 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; 297 if (!S_CHAR(c)) { 298 /* If we have to escape something, scan the string for unicode */ 299 Py_ssize_t j; 300 for (j = i; j < input_chars; j++) { 301 c = (Py_UNICODE)(unsigned char)input_str[j]; 302 if (c > 0x7f) { 303 /* We hit a non-ASCII character, bail to unicode mode */ 304 PyObject *uni; 305 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); 306 if (uni == NULL) { 307 return NULL; 308 } 309 rval = ascii_escape_unicode(uni); 310 Py_DECREF(uni); 311 return rval; 312 } 313 } 314 break; 315 } 316 } 317 318 if (i == input_chars) { 319 /* Input is already ASCII */ 320 output_size = 2 + input_chars; 321 } 322 else { 323 /* One char input can be up to 6 chars output, estimate 4 of these */ 324 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 325 } 134 326 rval = PyString_FromStringAndSize(NULL, output_size); 135 327 if (rval == NULL) { … … 137 329 } 138 330 output = PyString_AS_STRING(rval); 139 chars = 0; 140 output[chars++] = '"'; 141 for (i = 0; i < input_chars; i++) { 142 Py_UNICODE c = (Py_UNICODE)input_str[i]; 331 output[0] = '"'; 332 333 /* We know that everything up to i is ASCII already */ 334 chars = i + 1; 335 memcpy(&output[1], input_str, i); 336 337 for (; i < input_chars; i++) { 338 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; 143 339 if (S_CHAR(c)) { 144 340 output[chars++] = (char)c; 145 } else if (c > 0x7F) { 146 /* We hit a non-ASCII character, bail to unicode mode */ 147 PyObject *uni; 148 Py_DECREF(rval); 149 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); 150 if (uni == NULL) { 151 return NULL; 152 } 153 rval = ascii_escape_unicode(uni); 154 Py_DECREF(uni); 155 return rval; 156 } else { 341 } 342 else { 157 343 chars = ascii_escape_char(c, output, chars); 158 344 } … … 177 363 } 178 364 365 static void 366 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) 367 { 368 /* Use the Python function simplejson.decoder.errmsg to raise a nice 369 looking ValueError exception */ 370 static PyObject *JSONDecodeError = NULL; 371 PyObject *exc; 372 if (JSONDecodeError == NULL) { 373 PyObject *decoder = PyImport_ImportModule("simplejson.decoder"); 374 if (decoder == NULL) 375 return; 376 JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError"); 377 Py_DECREF(decoder); 378 if (JSONDecodeError == NULL) 379 return; 380 } 381 exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); 382 if (exc) { 383 PyErr_SetObject(JSONDecodeError, exc); 384 Py_DECREF(exc); 385 } 386 } 387 388 static PyObject * 389 join_list_unicode(PyObject *lst) 390 { 391 /* return u''.join(lst) */ 392 static PyObject *joinfn = NULL; 393 if (joinfn == NULL) { 394 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); 395 if (ustr == NULL) 396 return NULL; 397 398 joinfn = PyObject_GetAttrString(ustr, "join"); 399 Py_DECREF(ustr); 400 if (joinfn == NULL) 401 return NULL; 402 } 403 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); 404 } 405 406 static PyObject * 407 join_list_string(PyObject *lst) 408 { 409 /* return ''.join(lst) */ 410 static PyObject *joinfn = NULL; 411 if (joinfn == NULL) { 412 PyObject *ustr = PyString_FromStringAndSize(NULL, 0); 413 if (ustr == NULL) 414 return NULL; 415 416 joinfn = PyObject_GetAttrString(ustr, "join"); 417 Py_DECREF(ustr); 418 if (joinfn == NULL) 419 return NULL; 420 } 421 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); 422 } 423 424 static PyObject * 425 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { 426 /* return (rval, idx) tuple, stealing reference to rval */ 427 PyObject *tpl; 428 PyObject *pyidx; 429 /* 430 steal a reference to rval, returns (rval, idx) 431 */ 432 if (rval == NULL) { 433 return NULL; 434 } 435 pyidx = PyInt_FromSsize_t(idx); 436 if (pyidx == NULL) { 437 Py_DECREF(rval); 438 return NULL; 439 } 440 tpl = PyTuple_New(2); 441 if (tpl == NULL) { 442 Py_DECREF(pyidx); 443 Py_DECREF(rval); 444 return NULL; 445 } 446 PyTuple_SET_ITEM(tpl, 0, rval); 447 PyTuple_SET_ITEM(tpl, 1, pyidx); 448 return tpl; 449 } 450 451 #define APPEND_OLD_CHUNK \ 452 if (chunk != NULL) { \ 453 if (chunks == NULL) { \ 454 chunks = PyList_New(0); \ 455 if (chunks == NULL) { \ 456 goto bail; \ 457 } \ 458 } \ 459 if (PyList_Append(chunks, chunk)) { \ 460 goto bail; \ 461 } \ 462 Py_CLEAR(chunk); \ 463 } 464 465 static PyObject * 466 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) 467 { 468 /* Read the JSON string from PyString pystr. 469 end is the index of the first character after the quote. 470 encoding is the encoding of pystr (must be an ASCII superset) 471 if strict is zero then literal control characters are allowed 472 *next_end_ptr is a return-by-reference index of the character 473 after the end quote 474 475 Return value is a new PyString (if ASCII-only) or PyUnicode 476 */ 477 PyObject *rval; 478 Py_ssize_t len = PyString_GET_SIZE(pystr); 479 Py_ssize_t begin = end - 1; 480 Py_ssize_t next = begin; 481 int has_unicode = 0; 482 char *buf = PyString_AS_STRING(pystr); 483 PyObject *chunks = NULL; 484 PyObject *chunk = NULL; 485 486 if (end < 0 || len <= end) { 487 PyErr_SetString(PyExc_ValueError, "end is out of bounds"); 488 goto bail; 489 } 490 while (1) { 491 /* Find the end of the string or the next escape */ 492 Py_UNICODE c = 0; 493 for (next = end; next < len; next++) { 494 c = (unsigned char)buf[next]; 495 if (c == '"' || c == '\\') { 496 break; 497 } 498 else if (strict && c <= 0x1f) { 499 raise_errmsg("Invalid control character at", pystr, next); 500 goto bail; 501 } 502 else if (c > 0x7f) { 503 has_unicode = 1; 504 } 505 } 506 if (!(c == '"' || c == '\\')) { 507 raise_errmsg("Unterminated string starting at", pystr, begin); 508 goto bail; 509 } 510 /* Pick up this chunk if it's not zero length */ 511 if (next != end) { 512 PyObject *strchunk; 513 APPEND_OLD_CHUNK 514 strchunk = PyString_FromStringAndSize(&buf[end], next - end); 515 if (strchunk == NULL) { 516 goto bail; 517 } 518 if (has_unicode) { 519 chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL); 520 Py_DECREF(strchunk); 521 if (chunk == NULL) { 522 goto bail; 523 } 524 } 525 else { 526 chunk = strchunk; 527 } 528 } 529 next++; 530 if (c == '"') { 531 end = next; 532 break; 533 } 534 if (next == len) { 535 raise_errmsg("Unterminated string starting at", pystr, begin); 536 goto bail; 537 } 538 c = buf[next]; 539 if (c != 'u') { 540 /* Non-unicode backslash escapes */ 541 end = next + 1; 542 switch (c) { 543 case '"': break; 544 case '\\': break; 545 case '/': break; 546 case 'b': c = '\b'; break; 547 case 'f': c = '\f'; break; 548 case 'n': c = '\n'; break; 549 case 'r': c = '\r'; break; 550 case 't': c = '\t'; break; 551 default: c = 0; 552 } 553 if (c == 0) { 554 raise_errmsg("Invalid \\escape", pystr, end - 2); 555 goto bail; 556 } 557 } 558 else { 559 c = 0; 560 next++; 561 end = next + 4; 562 if (end >= len) { 563 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); 564 goto bail; 565 } 566 /* Decode 4 hex digits */ 567 for (; next < end; next++) { 568 Py_UNICODE digit = buf[next]; 569 c <<= 4; 570 switch (digit) { 571 case '0': case '1': case '2': case '3': case '4': 572 case '5': case '6': case '7': case '8': case '9': 573 c |= (digit - '0'); break; 574 case 'a': case 'b': case 'c': case 'd': case 'e': 575 case 'f': 576 c |= (digit - 'a' + 10); break; 577 case 'A': case 'B': case 'C': case 'D': case 'E': 578 case 'F': 579 c |= (digit - 'A' + 10); break; 580 default: 581 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); 582 goto bail; 583 } 584 } 585 #ifdef Py_UNICODE_WIDE 586 /* Surrogate pair */ 587 if ((c & 0xfc00) == 0xd800) { 588 Py_UNICODE c2 = 0; 589 if (end + 6 >= len) { 590 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 591 goto bail; 592 } 593 if (buf[next++] != '\\' || buf[next++] != 'u') { 594 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 595 goto bail; 596 } 597 end += 6; 598 /* Decode 4 hex digits */ 599 for (; next < end; next++) { 600 c2 <<= 4; 601 Py_UNICODE digit = buf[next]; 602 switch (digit) { 603 case '0': case '1': case '2': case '3': case '4': 604 case '5': case '6': case '7': case '8': case '9': 605 c2 |= (digit - '0'); break; 606 case 'a': case 'b': case 'c': case 'd': case 'e': 607 case 'f': 608 c2 |= (digit - 'a' + 10); break; 609 case 'A': case 'B': case 'C': case 'D': case 'E': 610 case 'F': 611 c2 |= (digit - 'A' + 10); break; 612 default: 613 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); 614 goto bail; 615 } 616 } 617 if ((c2 & 0xfc00) != 0xdc00) { 618 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 619 goto bail; 620 } 621 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); 622 } 623 else if ((c & 0xfc00) == 0xdc00) { 624 raise_errmsg("Unpaired low surrogate", pystr, end - 5); 625 goto bail; 626 } 627 #endif 628 } 629 if (c > 0x7f) { 630 has_unicode = 1; 631 } 632 APPEND_OLD_CHUNK 633 if (has_unicode) { 634 chunk = PyUnicode_FromUnicode(&c, 1); 635 if (chunk == NULL) { 636 goto bail; 637 } 638 } 639 else { 640 char c_char = Py_CHARMASK(c); 641 chunk = PyString_FromStringAndSize(&c_char, 1); 642 if (chunk == NULL) { 643 goto bail; 644 } 645 } 646 } 647 648 if (chunks == NULL) { 649 if (chunk != NULL) 650 rval = chunk; 651 else 652 rval = PyString_FromStringAndSize("", 0); 653 } 654 else { 655 APPEND_OLD_CHUNK 656 rval = join_list_string(chunks); 657 if (rval == NULL) { 658 goto bail; 659 } 660 Py_CLEAR(chunks); 661 } 662 663 *next_end_ptr = end; 664 return rval; 665 bail: 666 *next_end_ptr = -1; 667 Py_XDECREF(chunk); 668 Py_XDECREF(chunks); 669 return NULL; 670 } 671 672 673 static PyObject * 674 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) 675 { 676 /* Read the JSON string from PyUnicode pystr. 677 end is the index of the first character after the quote. 678 if strict is zero then literal control characters are allowed 679 *next_end_ptr is a return-by-reference index of the character 680 after the end quote 681 682 Return value is a new PyUnicode 683 */ 684 PyObject *rval; 685 Py_ssize_t len = PyUnicode_GET_SIZE(pystr); 686 Py_ssize_t begin = end - 1; 687 Py_ssize_t next = begin; 688 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); 689 PyObject *chunks = NULL; 690 PyObject *chunk = NULL; 691 692 if (end < 0 || len <= end) { 693 PyErr_SetString(PyExc_ValueError, "end is out of bounds"); 694 goto bail; 695 } 696 while (1) { 697 /* Find the end of the string or the next escape */ 698 Py_UNICODE c = 0; 699 for (next = end; next < len; next++) { 700 c = buf[next]; 701 if (c == '"' || c == '\\') { 702 break; 703 } 704 else if (strict && c <= 0x1f) { 705 raise_errmsg("Invalid control character at", pystr, next); 706 goto bail; 707 } 708 } 709 if (!(c == '"' || c == '\\')) { 710 raise_errmsg("Unterminated string starting at", pystr, begin); 711 goto bail; 712 } 713 /* Pick up this chunk if it's not zero length */ 714 if (next != end) { 715 APPEND_OLD_CHUNK 716 chunk = PyUnicode_FromUnicode(&buf[end], next - end); 717 if (chunk == NULL) { 718 goto bail; 719 } 720 } 721 next++; 722 if (c == '"') { 723 end = next; 724 break; 725 } 726 if (next == len) { 727 raise_errmsg("Unterminated string starting at", pystr, begin); 728 goto bail; 729 } 730 c = buf[next]; 731 if (c != 'u') { 732 /* Non-unicode backslash escapes */ 733 end = next + 1; 734 switch (c) { 735 case '"': break; 736 case '\\': break; 737 case '/': break; 738 case 'b': c = '\b'; break; 739 case 'f': c = '\f'; break; 740 case 'n': c = '\n'; break; 741 case 'r': c = '\r'; break; 742 case 't': c = '\t'; break; 743 default: c = 0; 744 } 745 if (c == 0) { 746 raise_errmsg("Invalid \\escape", pystr, end - 2); 747 goto bail; 748 } 749 } 750 else { 751 c = 0; 752 next++; 753 end = next + 4; 754 if (end >= len) { 755 raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1); 756 goto bail; 757 } 758 /* Decode 4 hex digits */ 759 for (; next < end; next++) { 760 Py_UNICODE digit = buf[next]; 761 c <<= 4; 762 switch (digit) { 763 case '0': case '1': case '2': case '3': case '4': 764 case '5': case '6': case '7': case '8': case '9': 765 c |= (digit - '0'); break; 766 case 'a': case 'b': case 'c': case 'd': case 'e': 767 case 'f': 768 c |= (digit - 'a' + 10); break; 769 case 'A': case 'B': case 'C': case 'D': case 'E': 770 case 'F': 771 c |= (digit - 'A' + 10); break; 772 default: 773 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); 774 goto bail; 775 } 776 } 777 #ifdef Py_UNICODE_WIDE 778 /* Surrogate pair */ 779 if ((c & 0xfc00) == 0xd800) { 780 Py_UNICODE c2 = 0; 781 if (end + 6 >= len) { 782 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 783 goto bail; 784 } 785 if (buf[next++] != '\\' || buf[next++] != 'u') { 786 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 787 goto bail; 788 } 789 end += 6; 790 /* Decode 4 hex digits */ 791 for (; next < end; next++) { 792 c2 <<= 4; 793 Py_UNICODE digit = buf[next]; 794 switch (digit) { 795 case '0': case '1': case '2': case '3': case '4': 796 case '5': case '6': case '7': case '8': case '9': 797 c2 |= (digit - '0'); break; 798 case 'a': case 'b': case 'c': case 'd': case 'e': 799 case 'f': 800 c2 |= (digit - 'a' + 10); break; 801 case 'A': case 'B': case 'C': case 'D': case 'E': 802 case 'F': 803 c2 |= (digit - 'A' + 10); break; 804 default: 805 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); 806 goto bail; 807 } 808 } 809 if ((c2 & 0xfc00) != 0xdc00) { 810 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 811 goto bail; 812 } 813 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); 814 } 815 else if ((c & 0xfc00) == 0xdc00) { 816 raise_errmsg("Unpaired low surrogate", pystr, end - 5); 817 goto bail; 818 } 819 #endif 820 } 821 APPEND_OLD_CHUNK 822 chunk = PyUnicode_FromUnicode(&c, 1); 823 if (chunk == NULL) { 824 goto bail; 825 } 826 } 827 828 if (chunks == NULL) { 829 if (chunk != NULL) 830 rval = chunk; 831 else 832 rval = PyUnicode_FromUnicode(NULL, 0); 833 } 834 else { 835 APPEND_OLD_CHUNK 836 rval = join_list_unicode(chunks); 837 if (rval == NULL) { 838 goto bail; 839 } 840 Py_CLEAR(chunks); 841 } 842 *next_end_ptr = end; 843 return rval; 844 bail: 845 *next_end_ptr = -1; 846 Py_XDECREF(chunk); 847 Py_XDECREF(chunks); 848 return NULL; 849 } 850 851 PyDoc_STRVAR(pydoc_scanstring, 852 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" 853 "\n" 854 "Scan the string s for a JSON string. End is the index of the\n" 855 "character in s after the quote that started the JSON string.\n" 856 "Unescapes all valid JSON string escape sequences and raises ValueError\n" 857 "on attempt to decode an invalid string. If strict is False then literal\n" 858 "control characters are allowed in the string.\n" 859 "\n" 860 "Returns a tuple of the decoded string and the index of the character in s\n" 861 "after the end quote." 862 ); 863 864 static PyObject * 865 py_scanstring(PyObject* self UNUSED, PyObject *args) 866 { 867 PyObject *pystr; 868 PyObject *rval; 869 Py_ssize_t end; 870 Py_ssize_t next_end = -1; 871 char *encoding = NULL; 872 int strict = 1; 873 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { 874 return NULL; 875 } 876 if (encoding == NULL) { 877 encoding = DEFAULT_ENCODING; 878 } 879 if (PyString_Check(pystr)) { 880 rval = scanstring_str(pystr, end, encoding, strict, &next_end); 881 } 882 else if (PyUnicode_Check(pystr)) { 883 rval = scanstring_unicode(pystr, end, strict, &next_end); 884 } 885 else { 886 PyErr_Format(PyExc_TypeError, 887 "first argument must be a string, not %.80s", 888 Py_TYPE(pystr)->tp_name); 889 return NULL; 890 } 891 return _build_rval_index_tuple(rval, next_end); 892 } 893 179 894 PyDoc_STRVAR(pydoc_encode_basestring_ascii, 180 895 "encode_basestring_ascii(basestring) -> str\n" 181 896 "\n" 182 " ..."897 "Return an ASCII-only JSON representation of a Python string" 183 898 ); 184 899 185 900 static PyObject * 186 py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) { 901 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) 902 { 903 /* Return an ASCII-only JSON representation of a Python string */ 187 904 /* METH_O */ 188 905 if (PyString_Check(pystr)) { 189 906 return ascii_escape_str(pystr); 190 } else if (PyUnicode_Check(pystr)) { 907 } 908 else if (PyUnicode_Check(pystr)) { 191 909 return ascii_escape_unicode(pystr); 192 910 } 193 PyErr_SetString(PyExc_TypeError, "first argument must be a string"); 911 else { 912 PyErr_Format(PyExc_TypeError, 913 "first argument must be a string, not %.80s", 914 Py_TYPE(pystr)->tp_name); 915 return NULL; 916 } 917 } 918 919 static void 920 scanner_dealloc(PyObject *self) 921 { 922 /* Deallocate scanner object */ 923 scanner_clear(self); 924 Py_TYPE(self)->tp_free(self); 925 } 926 927 static int 928 scanner_traverse(PyObject *self, visitproc visit, void *arg) 929 { 930 PyScannerObject *s; 931 assert(PyScanner_Check(self)); 932 s = (PyScannerObject *)self; 933 Py_VISIT(s->encoding); 934 Py_VISIT(s->strict); 935 Py_VISIT(s->object_hook); 936 Py_VISIT(s->pairs_hook); 937 Py_VISIT(s->parse_float); 938 Py_VISIT(s->parse_int); 939 Py_VISIT(s->parse_constant); 940 Py_VISIT(s->memo); 941 return 0; 942 } 943 944 static int 945 scanner_clear(PyObject *self) 946 { 947 PyScannerObject *s; 948 assert(PyScanner_Check(self)); 949 s = (PyScannerObject *)self; 950 Py_CLEAR(s->encoding); 951 Py_CLEAR(s->strict); 952 Py_CLEAR(s->object_hook); 953 Py_CLEAR(s->pairs_hook); 954 Py_CLEAR(s->parse_float); 955 Py_CLEAR(s->parse_int); 956 Py_CLEAR(s->parse_constant); 957 Py_CLEAR(s->memo); 958 return 0; 959 } 960 961 static PyObject * 962 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { 963 /* Read a JSON object from PyString pystr. 964 idx is the index of the first character after the opening curly brace. 965 *next_idx_ptr is a return-by-reference index to the first character after 966 the closing curly brace. 967 968 Returns a new PyObject (usually a dict, but object_hook or 969 object_pairs_hook can change that) 970 */ 971 char *str = PyString_AS_STRING(pystr); 972 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; 973 PyObject *rval = NULL; 974 PyObject *pairs = NULL; 975 PyObject *item; 976 PyObject *key = NULL; 977 PyObject *val = NULL; 978 char *encoding = PyString_AS_STRING(s->encoding); 979 int strict = PyObject_IsTrue(s->strict); 980 int has_pairs_hook = (s->pairs_hook != Py_None); 981 Py_ssize_t next_idx; 982 if (has_pairs_hook) { 983 pairs = PyList_New(0); 984 if (pairs == NULL) 985 return NULL; 986 } 987 else { 988 rval = PyDict_New(); 989 if (rval == NULL) 990 return NULL; 991 } 992 993 /* skip whitespace after { */ 994 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 995 996 /* only loop if the object is non-empty */ 997 if (idx <= end_idx && str[idx] != '}') { 998 while (idx <= end_idx) { 999 PyObject *memokey; 1000 1001 /* read key */ 1002 if (str[idx] != '"') { 1003 raise_errmsg("Expecting property name", pystr, idx); 1004 goto bail; 1005 } 1006 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); 1007 if (key == NULL) 1008 goto bail; 1009 memokey = PyDict_GetItem(s->memo, key); 1010 if (memokey != NULL) { 1011 Py_INCREF(memokey); 1012 Py_DECREF(key); 1013 key = memokey; 1014 } 1015 else { 1016 if (PyDict_SetItem(s->memo, key, key) < 0) 1017 goto bail; 1018 } 1019 idx = next_idx; 1020 1021 /* skip whitespace between key and : delimiter, read :, skip whitespace */ 1022 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1023 if (idx > end_idx || str[idx] != ':') { 1024 raise_errmsg("Expecting : delimiter", pystr, idx); 1025 goto bail; 1026 } 1027 idx++; 1028 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1029 1030 /* read any JSON data type */ 1031 val = scan_once_str(s, pystr, idx, &next_idx); 1032 if (val == NULL) 1033 goto bail; 1034 1035 if (has_pairs_hook) { 1036 item = PyTuple_Pack(2, key, val); 1037 if (item == NULL) 1038 goto bail; 1039 Py_CLEAR(key); 1040 Py_CLEAR(val); 1041 if (PyList_Append(pairs, item) == -1) { 1042 Py_DECREF(item); 1043 goto bail; 1044 } 1045 Py_DECREF(item); 1046 } 1047 else { 1048 if (PyDict_SetItem(rval, key, val) < 0) 1049 goto bail; 1050 Py_CLEAR(key); 1051 Py_CLEAR(val); 1052 } 1053 idx = next_idx; 1054 1055 /* skip whitespace before } or , */ 1056 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1057 1058 /* bail if the object is closed or we didn't get the , delimiter */ 1059 if (idx > end_idx) break; 1060 if (str[idx] == '}') { 1061 break; 1062 } 1063 else if (str[idx] != ',') { 1064 raise_errmsg("Expecting , delimiter", pystr, idx); 1065 goto bail; 1066 } 1067 idx++; 1068 1069 /* skip whitespace after , delimiter */ 1070 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1071 } 1072 } 1073 /* verify that idx < end_idx, str[idx] should be '}' */ 1074 if (idx > end_idx || str[idx] != '}') { 1075 raise_errmsg("Expecting object", pystr, end_idx); 1076 goto bail; 1077 } 1078 1079 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ 1080 if (s->pairs_hook != Py_None) { 1081 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); 1082 if (val == NULL) 1083 goto bail; 1084 Py_DECREF(pairs); 1085 *next_idx_ptr = idx + 1; 1086 return val; 1087 } 1088 1089 /* if object_hook is not None: rval = object_hook(rval) */ 1090 if (s->object_hook != Py_None) { 1091 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); 1092 if (val == NULL) 1093 goto bail; 1094 Py_DECREF(rval); 1095 rval = val; 1096 val = NULL; 1097 } 1098 *next_idx_ptr = idx + 1; 1099 return rval; 1100 bail: 1101 Py_XDECREF(rval); 1102 Py_XDECREF(key); 1103 Py_XDECREF(val); 1104 Py_XDECREF(pairs); 194 1105 return NULL; 195 1106 } 196 1107 197 #define DEFN(n, k) \ 198 { \ 199 #n, \ 200 (PyCFunction)py_ ##n, \ 201 k, \ 202 pydoc_ ##n \ 203 } 1108 static PyObject * 1109 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { 1110 /* Read a JSON object from PyUnicode pystr. 1111 idx is the index of the first character after the opening curly brace. 1112 *next_idx_ptr is a return-by-reference index to the first character after 1113 the closing curly brace. 1114 1115 Returns a new PyObject (usually a dict, but object_hook can change that) 1116 */ 1117 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); 1118 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; 1119 PyObject *rval = NULL; 1120 PyObject *pairs = NULL; 1121 PyObject *item; 1122 PyObject *key = NULL; 1123 PyObject *val = NULL; 1124 int strict = PyObject_IsTrue(s->strict); 1125 int has_pairs_hook = (s->pairs_hook != Py_None); 1126 Py_ssize_t next_idx; 1127 1128 if (has_pairs_hook) { 1129 pairs = PyList_New(0); 1130 if (pairs == NULL) 1131 return NULL; 1132 } 1133 else { 1134 rval = PyDict_New(); 1135 if (rval == NULL) 1136 return NULL; 1137 } 1138 1139 /* skip whitespace after { */ 1140 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1141 1142 /* only loop if the object is non-empty */ 1143 if (idx <= end_idx && str[idx] != '}') { 1144 while (idx <= end_idx) { 1145 PyObject *memokey; 1146 1147 /* read key */ 1148 if (str[idx] != '"') { 1149 raise_errmsg("Expecting property name", pystr, idx); 1150 goto bail; 1151 } 1152 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); 1153 if (key == NULL) 1154 goto bail; 1155 memokey = PyDict_GetItem(s->memo, key); 1156 if (memokey != NULL) { 1157 Py_INCREF(memokey); 1158 Py_DECREF(key); 1159 key = memokey; 1160 } 1161 else { 1162 if (PyDict_SetItem(s->memo, key, key) < 0) 1163 goto bail; 1164 } 1165 idx = next_idx; 1166 1167 /* skip whitespace between key and : delimiter, read :, skip whitespace */ 1168 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1169 if (idx > end_idx || str[idx] != ':') { 1170 raise_errmsg("Expecting : delimiter", pystr, idx); 1171 goto bail; 1172 } 1173 idx++; 1174 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1175 1176 /* read any JSON term */ 1177 val = scan_once_unicode(s, pystr, idx, &next_idx); 1178 if (val == NULL) 1179 goto bail; 1180 1181 if (has_pairs_hook) { 1182 item = PyTuple_Pack(2, key, val); 1183 if (item == NULL) 1184 goto bail; 1185 Py_CLEAR(key); 1186 Py_CLEAR(val); 1187 if (PyList_Append(pairs, item) == -1) { 1188 Py_DECREF(item); 1189 goto bail; 1190 } 1191 Py_DECREF(item); 1192 } 1193 else { 1194 if (PyDict_SetItem(rval, key, val) < 0) 1195 goto bail; 1196 Py_CLEAR(key); 1197 Py_CLEAR(val); 1198 } 1199 idx = next_idx; 1200 1201 /* skip whitespace before } or , */ 1202 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1203 1204 /* bail if the object is closed or we didn't get the , delimiter */ 1205 if (idx > end_idx) break; 1206 if (str[idx] == '}') { 1207 break; 1208 } 1209 else if (str[idx] != ',') { 1210 raise_errmsg("Expecting , delimiter", pystr, idx); 1211 goto bail; 1212 } 1213 idx++; 1214 1215 /* skip whitespace after , delimiter */ 1216 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1217 } 1218 } 1219 1220 /* verify that idx < end_idx, str[idx] should be '}' */ 1221 if (idx > end_idx || str[idx] != '}') { 1222 raise_errmsg("Expecting object", pystr, end_idx); 1223 goto bail; 1224 } 1225 1226 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ 1227 if (s->pairs_hook != Py_None) { 1228 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); 1229 if (val == NULL) 1230 goto bail; 1231 Py_DECREF(pairs); 1232 *next_idx_ptr = idx + 1; 1233 return val; 1234 } 1235 1236 /* if object_hook is not None: rval = object_hook(rval) */ 1237 if (s->object_hook != Py_None) { 1238 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); 1239 if (val == NULL) 1240 goto bail; 1241 Py_DECREF(rval); 1242 rval = val; 1243 val = NULL; 1244 } 1245 *next_idx_ptr = idx + 1; 1246 return rval; 1247 bail: 1248 Py_XDECREF(rval); 1249 Py_XDECREF(key); 1250 Py_XDECREF(val); 1251 Py_XDECREF(pairs); 1252 return NULL; 1253 } 1254 1255 static PyObject * 1256 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { 1257 /* Read a JSON array from PyString pystr. 1258 idx is the index of the first character after the opening brace. 1259 *next_idx_ptr is a return-by-reference index to the first character after 1260 the closing brace. 1261 1262 Returns a new PyList 1263 */ 1264 char *str = PyString_AS_STRING(pystr); 1265 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; 1266 PyObject *val = NULL; 1267 PyObject *rval = PyList_New(0); 1268 Py_ssize_t next_idx; 1269 if (rval == NULL) 1270 return NULL; 1271 1272 /* skip whitespace after [ */ 1273 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1274 1275 /* only loop if the array is non-empty */ 1276 if (idx <= end_idx && str[idx] != ']') { 1277 while (idx <= end_idx) { 1278 1279 /* read any JSON term and de-tuplefy the (rval, idx) */ 1280 val = scan_once_str(s, pystr, idx, &next_idx); 1281 if (val == NULL) { 1282 if (PyErr_ExceptionMatches(PyExc_StopIteration)) { 1283 PyErr_Clear(); 1284 raise_errmsg("Expecting object", pystr, idx); 1285 } 1286 goto bail; 1287 } 1288 1289 if (PyList_Append(rval, val) == -1) 1290 goto bail; 1291 1292 Py_CLEAR(val); 1293 idx = next_idx; 1294 1295 /* skip whitespace between term and , */ 1296 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1297 1298 /* bail if the array is closed or we didn't get the , delimiter */ 1299 if (idx > end_idx) break; 1300 if (str[idx] == ']') { 1301 break; 1302 } 1303 else if (str[idx] != ',') { 1304 raise_errmsg("Expecting , delimiter", pystr, idx); 1305 goto bail; 1306 } 1307 idx++; 1308 1309 /* skip whitespace after , */ 1310 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1311 } 1312 } 1313 1314 /* verify that idx < end_idx, str[idx] should be ']' */ 1315 if (idx > end_idx || str[idx] != ']') { 1316 raise_errmsg("Expecting object", pystr, end_idx); 1317 goto bail; 1318 } 1319 *next_idx_ptr = idx + 1; 1320 return rval; 1321 bail: 1322 Py_XDECREF(val); 1323 Py_DECREF(rval); 1324 return NULL; 1325 } 1326 1327 static PyObject * 1328 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { 1329 /* Read a JSON array from PyString pystr. 1330 idx is the index of the first character after the opening brace. 1331 *next_idx_ptr is a return-by-reference index to the first character after 1332 the closing brace. 1333 1334 Returns a new PyList 1335 */ 1336 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); 1337 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; 1338 PyObject *val = NULL; 1339 PyObject *rval = PyList_New(0); 1340 Py_ssize_t next_idx; 1341 if (rval == NULL) 1342 return NULL; 1343 1344 /* skip whitespace after [ */ 1345 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1346 1347 /* only loop if the array is non-empty */ 1348 if (idx <= end_idx && str[idx] != ']') { 1349 while (idx <= end_idx) { 1350 1351 /* read any JSON term */ 1352 val = scan_once_unicode(s, pystr, idx, &next_idx); 1353 if (val == NULL) { 1354 if (PyErr_ExceptionMatches(PyExc_StopIteration)) { 1355 PyErr_Clear(); 1356 raise_errmsg("Expecting object", pystr, idx); 1357 } 1358 goto bail; 1359 } 1360 1361 if (PyList_Append(rval, val) == -1) 1362 goto bail; 1363 1364 Py_CLEAR(val); 1365 idx = next_idx; 1366 1367 /* skip whitespace between term and , */ 1368 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1369 1370 /* bail if the array is closed or we didn't get the , delimiter */ 1371 if (idx > end_idx) break; 1372 if (str[idx] == ']') { 1373 break; 1374 } 1375 else if (str[idx] != ',') { 1376 raise_errmsg("Expecting , delimiter", pystr, idx); 1377 goto bail; 1378 } 1379 idx++; 1380 1381 /* skip whitespace after , */ 1382 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1383 } 1384 } 1385 1386 /* verify that idx < end_idx, str[idx] should be ']' */ 1387 if (idx > end_idx || str[idx] != ']') { 1388 raise_errmsg("Expecting object", pystr, end_idx); 1389 goto bail; 1390 } 1391 *next_idx_ptr = idx + 1; 1392 return rval; 1393 bail: 1394 Py_XDECREF(val); 1395 Py_DECREF(rval); 1396 return NULL; 1397 } 1398 1399 static PyObject * 1400 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { 1401 /* Read a JSON constant from PyString pystr. 1402 constant is the constant string that was found 1403 ("NaN", "Infinity", "-Infinity"). 1404 idx is the index of the first character of the constant 1405 *next_idx_ptr is a return-by-reference index to the first character after 1406 the constant. 1407 1408 Returns the result of parse_constant 1409 */ 1410 PyObject *cstr; 1411 PyObject *rval; 1412 /* constant is "NaN", "Infinity", or "-Infinity" */ 1413 cstr = PyString_InternFromString(constant); 1414 if (cstr == NULL) 1415 return NULL; 1416 1417 /* rval = parse_constant(constant) */ 1418 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); 1419 idx += PyString_GET_SIZE(cstr); 1420 Py_DECREF(cstr); 1421 *next_idx_ptr = idx; 1422 return rval; 1423 } 1424 1425 static PyObject * 1426 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { 1427 /* Read a JSON number from PyString pystr. 1428 idx is the index of the first character of the number 1429 *next_idx_ptr is a return-by-reference index to the first character after 1430 the number. 1431 1432 Returns a new PyObject representation of that number: 1433 PyInt, PyLong, or PyFloat. 1434 May return other types if parse_int or parse_float are set 1435 */ 1436 char *str = PyString_AS_STRING(pystr); 1437 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; 1438 Py_ssize_t idx = start; 1439 int is_float = 0; 1440 PyObject *rval; 1441 PyObject *numstr; 1442 1443 /* read a sign if it's there, make sure it's not the end of the string */ 1444 if (str[idx] == '-') { 1445 idx++; 1446 if (idx > end_idx) { 1447 PyErr_SetNone(PyExc_StopIteration); 1448 return NULL; 1449 } 1450 } 1451 1452 /* read as many integer digits as we find as long as it doesn't start with 0 */ 1453 if (str[idx] >= '1' && str[idx] <= '9') { 1454 idx++; 1455 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1456 } 1457 /* if it starts with 0 we only expect one integer digit */ 1458 else if (str[idx] == '0') { 1459 idx++; 1460 } 1461 /* no integer digits, error */ 1462 else { 1463 PyErr_SetNone(PyExc_StopIteration); 1464 return NULL; 1465 } 1466 1467 /* if the next char is '.' followed by a digit then read all float digits */ 1468 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { 1469 is_float = 1; 1470 idx += 2; 1471 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1472 } 1473 1474 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ 1475 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { 1476 1477 /* save the index of the 'e' or 'E' just in case we need to backtrack */ 1478 Py_ssize_t e_start = idx; 1479 idx++; 1480 1481 /* read an exponent sign if present */ 1482 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; 1483 1484 /* read all digits */ 1485 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1486 1487 /* if we got a digit, then parse as float. if not, backtrack */ 1488 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { 1489 is_float = 1; 1490 } 1491 else { 1492 idx = e_start; 1493 } 1494 } 1495 1496 /* copy the section we determined to be a number */ 1497 numstr = PyString_FromStringAndSize(&str[start], idx - start); 1498 if (numstr == NULL) 1499 return NULL; 1500 if (is_float) { 1501 /* parse as a float using a fast path if available, otherwise call user defined method */ 1502 if (s->parse_float != (PyObject *)&PyFloat_Type) { 1503 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); 1504 } 1505 else { 1506 /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */ 1507 double d = PyOS_string_to_double(PyString_AS_STRING(numstr), 1508 NULL, NULL); 1509 if (d == -1.0 && PyErr_Occurred()) 1510 return NULL; 1511 rval = PyFloat_FromDouble(d); 1512 } 1513 } 1514 else { 1515 /* parse as an int using a fast path if available, otherwise call user defined method */ 1516 if (s->parse_int != (PyObject *)&PyInt_Type) { 1517 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); 1518 } 1519 else { 1520 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); 1521 } 1522 } 1523 Py_DECREF(numstr); 1524 *next_idx_ptr = idx; 1525 return rval; 1526 } 1527 1528 static PyObject * 1529 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { 1530 /* Read a JSON number from PyUnicode pystr. 1531 idx is the index of the first character of the number 1532 *next_idx_ptr is a return-by-reference index to the first character after 1533 the number. 1534 1535 Returns a new PyObject representation of that number: 1536 PyInt, PyLong, or PyFloat. 1537 May return other types if parse_int or parse_float are set 1538 */ 1539 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); 1540 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; 1541 Py_ssize_t idx = start; 1542 int is_float = 0; 1543 PyObject *rval; 1544 PyObject *numstr; 1545 1546 /* read a sign if it's there, make sure it's not the end of the string */ 1547 if (str[idx] == '-') { 1548 idx++; 1549 if (idx > end_idx) { 1550 PyErr_SetNone(PyExc_StopIteration); 1551 return NULL; 1552 } 1553 } 1554 1555 /* read as many integer digits as we find as long as it doesn't start with 0 */ 1556 if (str[idx] >= '1' && str[idx] <= '9') { 1557 idx++; 1558 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1559 } 1560 /* if it starts with 0 we only expect one integer digit */ 1561 else if (str[idx] == '0') { 1562 idx++; 1563 } 1564 /* no integer digits, error */ 1565 else { 1566 PyErr_SetNone(PyExc_StopIteration); 1567 return NULL; 1568 } 1569 1570 /* if the next char is '.' followed by a digit then read all float digits */ 1571 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { 1572 is_float = 1; 1573 idx += 2; 1574 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1575 } 1576 1577 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ 1578 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { 1579 Py_ssize_t e_start = idx; 1580 idx++; 1581 1582 /* read an exponent sign if present */ 1583 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; 1584 1585 /* read all digits */ 1586 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1587 1588 /* if we got a digit, then parse as float. if not, backtrack */ 1589 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { 1590 is_float = 1; 1591 } 1592 else { 1593 idx = e_start; 1594 } 1595 } 1596 1597 /* copy the section we determined to be a number */ 1598 numstr = PyUnicode_FromUnicode(&str[start], idx - start); 1599 if (numstr == NULL) 1600 return NULL; 1601 if (is_float) { 1602 /* parse as a float using a fast path if available, otherwise call user defined method */ 1603 if (s->parse_float != (PyObject *)&PyFloat_Type) { 1604 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); 1605 } 1606 else { 1607 rval = PyFloat_FromString(numstr, NULL); 1608 } 1609 } 1610 else { 1611 /* no fast path for unicode -> int, just call */ 1612 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); 1613 } 1614 Py_DECREF(numstr); 1615 *next_idx_ptr = idx; 1616 return rval; 1617 } 1618 1619 static PyObject * 1620 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) 1621 { 1622 /* Read one JSON term (of any kind) from PyString pystr. 1623 idx is the index of the first character of the term 1624 *next_idx_ptr is a return-by-reference index to the first character after 1625 the number. 1626 1627 Returns a new PyObject representation of the term. 1628 */ 1629 char *str = PyString_AS_STRING(pystr); 1630 Py_ssize_t length = PyString_GET_SIZE(pystr); 1631 PyObject *rval = NULL; 1632 int fallthrough = 0; 1633 if (idx >= length) { 1634 PyErr_SetNone(PyExc_StopIteration); 1635 return NULL; 1636 } 1637 if (Py_EnterRecursiveCall(" while decoding a JSON document")) 1638 return NULL; 1639 switch (str[idx]) { 1640 case '"': 1641 /* string */ 1642 rval = scanstring_str(pystr, idx + 1, 1643 PyString_AS_STRING(s->encoding), 1644 PyObject_IsTrue(s->strict), 1645 next_idx_ptr); 1646 break; 1647 case '{': 1648 /* object */ 1649 rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); 1650 break; 1651 case '[': 1652 /* array */ 1653 rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); 1654 break; 1655 case 'n': 1656 /* null */ 1657 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { 1658 Py_INCREF(Py_None); 1659 *next_idx_ptr = idx + 4; 1660 rval = Py_None; 1661 } 1662 else 1663 fallthrough = 1; 1664 break; 1665 case 't': 1666 /* true */ 1667 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { 1668 Py_INCREF(Py_True); 1669 *next_idx_ptr = idx + 4; 1670 rval = Py_True; 1671 } 1672 else 1673 fallthrough = 1; 1674 break; 1675 case 'f': 1676 /* false */ 1677 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { 1678 Py_INCREF(Py_False); 1679 *next_idx_ptr = idx + 5; 1680 rval = Py_False; 1681 } 1682 else 1683 fallthrough = 1; 1684 break; 1685 case 'N': 1686 /* NaN */ 1687 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { 1688 rval = _parse_constant(s, "NaN", idx, next_idx_ptr); 1689 } 1690 else 1691 fallthrough = 1; 1692 break; 1693 case 'I': 1694 /* Infinity */ 1695 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { 1696 rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); 1697 } 1698 else 1699 fallthrough = 1; 1700 break; 1701 case '-': 1702 /* -Infinity */ 1703 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { 1704 rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); 1705 } 1706 else 1707 fallthrough = 1; 1708 break; 1709 default: 1710 fallthrough = 1; 1711 } 1712 /* Didn't find a string, object, array, or named constant. Look for a number. */ 1713 if (fallthrough) 1714 rval = _match_number_str(s, pystr, idx, next_idx_ptr); 1715 Py_LeaveRecursiveCall(); 1716 return rval; 1717 } 1718 1719 static PyObject * 1720 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) 1721 { 1722 /* Read one JSON term (of any kind) from PyUnicode pystr. 1723 idx is the index of the first character of the term 1724 *next_idx_ptr is a return-by-reference index to the first character after 1725 the number. 1726 1727 Returns a new PyObject representation of the term. 1728 */ 1729 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); 1730 Py_ssize_t length = PyUnicode_GET_SIZE(pystr); 1731 PyObject *rval = NULL; 1732 int fallthrough = 0; 1733 if (idx >= length) { 1734 PyErr_SetNone(PyExc_StopIteration); 1735 return NULL; 1736 } 1737 if (Py_EnterRecursiveCall(" while decoding a JSON document")) 1738 return NULL; 1739 switch (str[idx]) { 1740 case '"': 1741 /* string */ 1742 rval = scanstring_unicode(pystr, idx + 1, 1743 PyObject_IsTrue(s->strict), 1744 next_idx_ptr); 1745 break; 1746 case '{': 1747 /* object */ 1748 rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); 1749 break; 1750 case '[': 1751 /* array */ 1752 rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); 1753 break; 1754 case 'n': 1755 /* null */ 1756 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { 1757 Py_INCREF(Py_None); 1758 *next_idx_ptr = idx + 4; 1759 rval = Py_None; 1760 } 1761 else 1762 fallthrough = 1; 1763 break; 1764 case 't': 1765 /* true */ 1766 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { 1767 Py_INCREF(Py_True); 1768 *next_idx_ptr = idx + 4; 1769 rval = Py_True; 1770 } 1771 else 1772 fallthrough = 1; 1773 break; 1774 case 'f': 1775 /* false */ 1776 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { 1777 Py_INCREF(Py_False); 1778 *next_idx_ptr = idx + 5; 1779 rval = Py_False; 1780 } 1781 else 1782 fallthrough = 1; 1783 break; 1784 case 'N': 1785 /* NaN */ 1786 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { 1787 rval = _parse_constant(s, "NaN", idx, next_idx_ptr); 1788 } 1789 else 1790 fallthrough = 1; 1791 break; 1792 case 'I': 1793 /* Infinity */ 1794 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { 1795 rval = _parse_constant(s, "Infinity", idx, next_idx_ptr); 1796 } 1797 else 1798 fallthrough = 1; 1799 break; 1800 case '-': 1801 /* -Infinity */ 1802 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { 1803 rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr); 1804 } 1805 else 1806 fallthrough = 1; 1807 break; 1808 default: 1809 fallthrough = 1; 1810 } 1811 /* Didn't find a string, object, array, or named constant. Look for a number. */ 1812 if (fallthrough) 1813 rval = _match_number_unicode(s, pystr, idx, next_idx_ptr); 1814 Py_LeaveRecursiveCall(); 1815 return rval; 1816 } 1817 1818 static PyObject * 1819 scanner_call(PyObject *self, PyObject *args, PyObject *kwds) 1820 { 1821 /* Python callable interface to scan_once_{str,unicode} */ 1822 PyObject *pystr; 1823 PyObject *rval; 1824 Py_ssize_t idx; 1825 Py_ssize_t next_idx = -1; 1826 static char *kwlist[] = {"string", "idx", NULL}; 1827 PyScannerObject *s; 1828 assert(PyScanner_Check(self)); 1829 s = (PyScannerObject *)self; 1830 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) 1831 return NULL; 1832 1833 if (PyString_Check(pystr)) { 1834 rval = scan_once_str(s, pystr, idx, &next_idx); 1835 } 1836 else if (PyUnicode_Check(pystr)) { 1837 rval = scan_once_unicode(s, pystr, idx, &next_idx); 1838 } 1839 else { 1840 PyErr_Format(PyExc_TypeError, 1841 "first argument must be a string, not %.80s", 1842 Py_TYPE(pystr)->tp_name); 1843 return NULL; 1844 } 1845 PyDict_Clear(s->memo); 1846 return _build_rval_index_tuple(rval, next_idx); 1847 } 1848 1849 static PyObject * 1850 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1851 { 1852 PyScannerObject *s; 1853 s = (PyScannerObject *)type->tp_alloc(type, 0); 1854 if (s != NULL) { 1855 s->encoding = NULL; 1856 s->strict = NULL; 1857 s->object_hook = NULL; 1858 s->pairs_hook = NULL; 1859 s->parse_float = NULL; 1860 s->parse_int = NULL; 1861 s->parse_constant = NULL; 1862 } 1863 return (PyObject *)s; 1864 } 1865 1866 static int 1867 scanner_init(PyObject *self, PyObject *args, PyObject *kwds) 1868 { 1869 /* Initialize Scanner object */ 1870 PyObject *ctx; 1871 static char *kwlist[] = {"context", NULL}; 1872 PyScannerObject *s; 1873 1874 assert(PyScanner_Check(self)); 1875 s = (PyScannerObject *)self; 1876 1877 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) 1878 return -1; 1879 1880 if (s->memo == NULL) { 1881 s->memo = PyDict_New(); 1882 if (s->memo == NULL) 1883 goto bail; 1884 } 1885 1886 /* PyString_AS_STRING is used on encoding */ 1887 s->encoding = PyObject_GetAttrString(ctx, "encoding"); 1888 if (s->encoding == NULL) 1889 goto bail; 1890 if (s->encoding == Py_None) { 1891 Py_DECREF(Py_None); 1892 s->encoding = PyString_InternFromString(DEFAULT_ENCODING); 1893 } 1894 else if (PyUnicode_Check(s->encoding)) { 1895 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); 1896 Py_DECREF(s->encoding); 1897 s->encoding = tmp; 1898 } 1899 if (s->encoding == NULL || !PyString_Check(s->encoding)) 1900 goto bail; 1901 1902 /* All of these will fail "gracefully" so we don't need to verify them */ 1903 s->strict = PyObject_GetAttrString(ctx, "strict"); 1904 if (s->strict == NULL) 1905 goto bail; 1906 s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); 1907 if (s->object_hook == NULL) 1908 goto bail; 1909 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); 1910 if (s->pairs_hook == NULL) 1911 goto bail; 1912 s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); 1913 if (s->parse_float == NULL) 1914 goto bail; 1915 s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); 1916 if (s->parse_int == NULL) 1917 goto bail; 1918 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); 1919 if (s->parse_constant == NULL) 1920 goto bail; 1921 1922 return 0; 1923 1924 bail: 1925 Py_CLEAR(s->encoding); 1926 Py_CLEAR(s->strict); 1927 Py_CLEAR(s->object_hook); 1928 Py_CLEAR(s->pairs_hook); 1929 Py_CLEAR(s->parse_float); 1930 Py_CLEAR(s->parse_int); 1931 Py_CLEAR(s->parse_constant); 1932 return -1; 1933 } 1934 1935 PyDoc_STRVAR(scanner_doc, "JSON scanner object"); 1936 1937 static 1938 PyTypeObject PyScannerType = { 1939 PyObject_HEAD_INIT(NULL) 1940 0, /* tp_internal */ 1941 "simplejson._speedups.Scanner", /* tp_name */ 1942 sizeof(PyScannerObject), /* tp_basicsize */ 1943 0, /* tp_itemsize */ 1944 scanner_dealloc, /* tp_dealloc */ 1945 0, /* tp_print */ 1946 0, /* tp_getattr */ 1947 0, /* tp_setattr */ 1948 0, /* tp_compare */ 1949 0, /* tp_repr */ 1950 0, /* tp_as_number */ 1951 0, /* tp_as_sequence */ 1952 0, /* tp_as_mapping */ 1953 0, /* tp_hash */ 1954 scanner_call, /* tp_call */ 1955 0, /* tp_str */ 1956 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ 1957 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ 1958 0, /* tp_as_buffer */ 1959 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 1960 scanner_doc, /* tp_doc */ 1961 scanner_traverse, /* tp_traverse */ 1962 scanner_clear, /* tp_clear */ 1963 0, /* tp_richcompare */ 1964 0, /* tp_weaklistoffset */ 1965 0, /* tp_iter */ 1966 0, /* tp_iternext */ 1967 0, /* tp_methods */ 1968 scanner_members, /* tp_members */ 1969 0, /* tp_getset */ 1970 0, /* tp_base */ 1971 0, /* tp_dict */ 1972 0, /* tp_descr_get */ 1973 0, /* tp_descr_set */ 1974 0, /* tp_dictoffset */ 1975 scanner_init, /* tp_init */ 1976 0,/* PyType_GenericAlloc, */ /* tp_alloc */ 1977 scanner_new, /* tp_new */ 1978 0,/* PyObject_GC_Del, */ /* tp_free */ 1979 }; 1980 1981 static PyObject * 1982 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1983 { 1984 PyEncoderObject *s; 1985 s = (PyEncoderObject *)type->tp_alloc(type, 0); 1986 if (s != NULL) { 1987 s->markers = NULL; 1988 s->defaultfn = NULL; 1989 s->encoder = NULL; 1990 s->indent = NULL; 1991 s->key_separator = NULL; 1992 s->item_separator = NULL; 1993 s->sort_keys = NULL; 1994 s->skipkeys = NULL; 1995 s->key_memo = NULL; 1996 } 1997 return (PyObject *)s; 1998 } 1999 2000 static int 2001 encoder_init(PyObject *self, PyObject *args, PyObject *kwds) 2002 { 2003 /* initialize Encoder object */ 2004 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", NULL}; 2005 2006 PyEncoderObject *s; 2007 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; 2008 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo, *use_decimal; 2009 2010 assert(PyEncoder_Check(self)); 2011 s = (PyEncoderObject *)self; 2012 2013 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOO:make_encoder", kwlist, 2014 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, 2015 &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal)) 2016 return -1; 2017 2018 s->markers = markers; 2019 s->defaultfn = defaultfn; 2020 s->encoder = encoder; 2021 s->indent = indent; 2022 s->key_separator = key_separator; 2023 s->item_separator = item_separator; 2024 s->sort_keys = sort_keys; 2025 s->skipkeys = skipkeys; 2026 s->key_memo = key_memo; 2027 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); 2028 s->allow_nan = PyObject_IsTrue(allow_nan); 2029 s->use_decimal = PyObject_IsTrue(use_decimal); 2030 2031 Py_INCREF(s->markers); 2032 Py_INCREF(s->defaultfn); 2033 Py_INCREF(s->encoder); 2034 Py_INCREF(s->indent); 2035 Py_INCREF(s->key_separator); 2036 Py_INCREF(s->item_separator); 2037 Py_INCREF(s->sort_keys); 2038 Py_INCREF(s->skipkeys); 2039 Py_INCREF(s->key_memo); 2040 return 0; 2041 } 2042 2043 static PyObject * 2044 encoder_call(PyObject *self, PyObject *args, PyObject *kwds) 2045 { 2046 /* Python callable interface to encode_listencode_obj */ 2047 static char *kwlist[] = {"obj", "_current_indent_level", NULL}; 2048 PyObject *obj; 2049 PyObject *rval; 2050 Py_ssize_t indent_level; 2051 PyEncoderObject *s; 2052 assert(PyEncoder_Check(self)); 2053 s = (PyEncoderObject *)self; 2054 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, 2055 &obj, _convertPyInt_AsSsize_t, &indent_level)) 2056 return NULL; 2057 rval = PyList_New(0); 2058 if (rval == NULL) 2059 return NULL; 2060 if (encoder_listencode_obj(s, rval, obj, indent_level)) { 2061 Py_DECREF(rval); 2062 return NULL; 2063 } 2064 return rval; 2065 } 2066 2067 static PyObject * 2068 _encoded_const(PyObject *obj) 2069 { 2070 /* Return the JSON string representation of None, True, False */ 2071 if (obj == Py_None) { 2072 static PyObject *s_null = NULL; 2073 if (s_null == NULL) { 2074 s_null = PyString_InternFromString("null"); 2075 } 2076 Py_INCREF(s_null); 2077 return s_null; 2078 } 2079 else if (obj == Py_True) { 2080 static PyObject *s_true = NULL; 2081 if (s_true == NULL) { 2082 s_true = PyString_InternFromString("true"); 2083 } 2084 Py_INCREF(s_true); 2085 return s_true; 2086 } 2087 else if (obj == Py_False) { 2088 static PyObject *s_false = NULL; 2089 if (s_false == NULL) { 2090 s_false = PyString_InternFromString("false"); 2091 } 2092 Py_INCREF(s_false); 2093 return s_false; 2094 } 2095 else { 2096 PyErr_SetString(PyExc_ValueError, "not a const"); 2097 return NULL; 2098 } 2099 } 2100 2101 static PyObject * 2102 encoder_encode_float(PyEncoderObject *s, PyObject *obj) 2103 { 2104 /* Return the JSON representation of a PyFloat */ 2105 double i = PyFloat_AS_DOUBLE(obj); 2106 if (!Py_IS_FINITE(i)) { 2107 if (!s->allow_nan) { 2108 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); 2109 return NULL; 2110 } 2111 if (i > 0) { 2112 return PyString_FromString("Infinity"); 2113 } 2114 else if (i < 0) { 2115 return PyString_FromString("-Infinity"); 2116 } 2117 else { 2118 return PyString_FromString("NaN"); 2119 } 2120 } 2121 /* Use a better float format here? */ 2122 return PyObject_Repr(obj); 2123 } 2124 2125 static PyObject * 2126 encoder_encode_string(PyEncoderObject *s, PyObject *obj) 2127 { 2128 /* Return the JSON representation of a string */ 2129 if (s->fast_encode) 2130 return py_encode_basestring_ascii(NULL, obj); 2131 else 2132 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); 2133 } 2134 2135 static int 2136 _steal_list_append(PyObject *lst, PyObject *stolen) 2137 { 2138 /* Append stolen and then decrement its reference count */ 2139 int rval = PyList_Append(lst, stolen); 2140 Py_DECREF(stolen); 2141 return rval; 2142 } 2143 2144 static int 2145 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) 2146 { 2147 /* Encode Python object obj to a JSON term, rval is a PyList */ 2148 int rv = -1; 2149 if (Py_EnterRecursiveCall(" while encoding a JSON document")) 2150 return rv; 2151 do { 2152 if (obj == Py_None || obj == Py_True || obj == Py_False) { 2153 PyObject *cstr = _encoded_const(obj); 2154 if (cstr != NULL) 2155 rv = _steal_list_append(rval, cstr); 2156 } 2157 else if (PyString_Check(obj) || PyUnicode_Check(obj)) 2158 { 2159 PyObject *encoded = encoder_encode_string(s, obj); 2160 if (encoded != NULL) 2161 rv = _steal_list_append(rval, encoded); 2162 } 2163 else if (PyInt_Check(obj) || PyLong_Check(obj)) { 2164 PyObject *encoded = PyObject_Str(obj); 2165 if (encoded != NULL) 2166 rv = _steal_list_append(rval, encoded); 2167 } 2168 else if (PyFloat_Check(obj)) { 2169 PyObject *encoded = encoder_encode_float(s, obj); 2170 if (encoded != NULL) 2171 rv = _steal_list_append(rval, encoded); 2172 } 2173 else if (PyList_Check(obj) || PyTuple_Check(obj)) { 2174 rv = encoder_listencode_list(s, rval, obj, indent_level); 2175 } 2176 else if (PyDict_Check(obj)) { 2177 rv = encoder_listencode_dict(s, rval, obj, indent_level); 2178 } 2179 else if (s->use_decimal && Decimal_Check(obj)) { 2180 PyObject *encoded = PyObject_Str(obj); 2181 if (encoded != NULL) 2182 rv = _steal_list_append(rval, encoded); 2183 } 2184 else { 2185 PyObject *ident = NULL; 2186 PyObject *newobj; 2187 if (s->markers != Py_None) { 2188 int has_key; 2189 ident = PyLong_FromVoidPtr(obj); 2190 if (ident == NULL) 2191 break; 2192 has_key = PyDict_Contains(s->markers, ident); 2193 if (has_key) { 2194 if (has_key != -1) 2195 PyErr_SetString(PyExc_ValueError, "Circular reference detected"); 2196 Py_DECREF(ident); 2197 break; 2198 } 2199 if (PyDict_SetItem(s->markers, ident, obj)) { 2200 Py_DECREF(ident); 2201 break; 2202 } 2203 } 2204 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); 2205 if (newobj == NULL) { 2206 Py_XDECREF(ident); 2207 break; 2208 } 2209 rv = encoder_listencode_obj(s, rval, newobj, indent_level); 2210 Py_DECREF(newobj); 2211 if (rv) { 2212 Py_XDECREF(ident); 2213 rv = -1; 2214 } 2215 else if (ident != NULL) { 2216 if (PyDict_DelItem(s->markers, ident)) { 2217 Py_XDECREF(ident); 2218 rv = -1; 2219 } 2220 Py_XDECREF(ident); 2221 } 2222 } 2223 } while (0); 2224 Py_LeaveRecursiveCall(); 2225 return rv; 2226 } 2227 2228 static int 2229 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) 2230 { 2231 /* Encode Python dict dct a JSON term, rval is a PyList */ 2232 static PyObject *open_dict = NULL; 2233 static PyObject *close_dict = NULL; 2234 static PyObject *empty_dict = NULL; 2235 static PyObject *iteritems = NULL; 2236 PyObject *kstr = NULL; 2237 PyObject *ident = NULL; 2238 PyObject *iter = NULL; 2239 PyObject *item = NULL; 2240 PyObject *items = NULL; 2241 PyObject *encoded = NULL; 2242 int skipkeys; 2243 Py_ssize_t idx; 2244 2245 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) { 2246 open_dict = PyString_InternFromString("{"); 2247 close_dict = PyString_InternFromString("}"); 2248 empty_dict = PyString_InternFromString("{}"); 2249 iteritems = PyString_InternFromString("iteritems"); 2250 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) 2251 return -1; 2252 } 2253 if (PyDict_Size(dct) == 0) 2254 return PyList_Append(rval, empty_dict); 2255 2256 if (s->markers != Py_None) { 2257 int has_key; 2258 ident = PyLong_FromVoidPtr(dct); 2259 if (ident == NULL) 2260 goto bail; 2261 has_key = PyDict_Contains(s->markers, ident); 2262 if (has_key) { 2263 if (has_key != -1) 2264 PyErr_SetString(PyExc_ValueError, "Circular reference detected"); 2265 goto bail; 2266 } 2267 if (PyDict_SetItem(s->markers, ident, dct)) { 2268 goto bail; 2269 } 2270 } 2271 2272 if (PyList_Append(rval, open_dict)) 2273 goto bail; 2274 2275 if (s->indent != Py_None) { 2276 /* TODO: DOES NOT RUN */ 2277 indent_level += 1; 2278 /* 2279 newline_indent = '\n' + (_indent * _current_indent_level) 2280 separator = _item_separator + newline_indent 2281 buf += newline_indent 2282 */ 2283 } 2284 2285 if (PyObject_IsTrue(s->sort_keys)) { 2286 /* First sort the keys then replace them with (key, value) tuples. */ 2287 Py_ssize_t i, nitems; 2288 if (PyDict_CheckExact(dct)) 2289 items = PyDict_Keys(dct); 2290 else 2291 items = PyMapping_Keys(dct); 2292 if (items == NULL) 2293 goto bail; 2294 if (!PyList_Check(items)) { 2295 PyErr_SetString(PyExc_ValueError, "keys must return list"); 2296 goto bail; 2297 } 2298 if (PyList_Sort(items) < 0) 2299 goto bail; 2300 nitems = PyList_GET_SIZE(items); 2301 for (i = 0; i < nitems; i++) { 2302 PyObject *key, *value; 2303 key = PyList_GET_ITEM(items, i); 2304 value = PyDict_GetItem(dct, key); 2305 item = PyTuple_Pack(2, key, value); 2306 if (item == NULL) 2307 goto bail; 2308 PyList_SET_ITEM(items, i, item); 2309 Py_DECREF(key); 2310 } 2311 } 2312 else { 2313 if (PyDict_CheckExact(dct)) 2314 items = PyDict_Items(dct); 2315 else 2316 items = PyMapping_Items(dct); 2317 } 2318 if (items == NULL) 2319 goto bail; 2320 iter = PyObject_GetIter(items); 2321 Py_DECREF(items); 2322 if (iter == NULL) 2323 goto bail; 2324 2325 skipkeys = PyObject_IsTrue(s->skipkeys); 2326 idx = 0; 2327 while ((item = PyIter_Next(iter))) { 2328 PyObject *encoded, *key, *value; 2329 if (!PyTuple_Check(item) || Py_SIZE(item) != 2) { 2330 PyErr_SetString(PyExc_ValueError, "items must return 2-tuples"); 2331 goto bail; 2332 } 2333 key = PyTuple_GET_ITEM(item, 0); 2334 if (key == NULL) 2335 goto bail; 2336 value = PyTuple_GET_ITEM(item, 1); 2337 if (value == NULL) 2338 goto bail; 2339 2340 encoded = PyDict_GetItem(s->key_memo, key); 2341 if (encoded != NULL) { 2342 Py_INCREF(encoded); 2343 } 2344 else if (PyString_Check(key) || PyUnicode_Check(key)) { 2345 Py_INCREF(key); 2346 kstr = key; 2347 } 2348 else if (PyFloat_Check(key)) { 2349 kstr = encoder_encode_float(s, key); 2350 if (kstr == NULL) 2351 goto bail; 2352 } 2353 else if (key == Py_True || key == Py_False || key == Py_None) { 2354 /* This must come before the PyInt_Check because 2355 True and False are also 1 and 0.*/ 2356 kstr = _encoded_const(key); 2357 if (kstr == NULL) 2358 goto bail; 2359 } 2360 else if (PyInt_Check(key) || PyLong_Check(key)) { 2361 kstr = PyObject_Str(key); 2362 if (kstr == NULL) 2363 goto bail; 2364 } 2365 else if (skipkeys) { 2366 Py_DECREF(item); 2367 continue; 2368 } 2369 else { 2370 /* TODO: include repr of key */ 2371 PyErr_SetString(PyExc_TypeError, "keys must be a string"); 2372 goto bail; 2373 } 2374 2375 if (idx) { 2376 if (PyList_Append(rval, s->item_separator)) 2377 goto bail; 2378 } 2379 2380 if (encoded == NULL) { 2381 encoded = encoder_encode_string(s, kstr); 2382 Py_CLEAR(kstr); 2383 if (encoded == NULL) 2384 goto bail; 2385 if (PyDict_SetItem(s->key_memo, key, encoded)) 2386 goto bail; 2387 } 2388 if (PyList_Append(rval, encoded)) { 2389 goto bail; 2390 } 2391 Py_CLEAR(encoded); 2392 if (PyList_Append(rval, s->key_separator)) 2393 goto bail; 2394 if (encoder_listencode_obj(s, rval, value, indent_level)) 2395 goto bail; 2396 Py_CLEAR(item); 2397 idx += 1; 2398 } 2399 Py_CLEAR(iter); 2400 if (PyErr_Occurred()) 2401 goto bail; 2402 if (ident != NULL) { 2403 if (PyDict_DelItem(s->markers, ident)) 2404 goto bail; 2405 Py_CLEAR(ident); 2406 } 2407 if (s->indent != Py_None) { 2408 /* TODO: DOES NOT RUN */ 2409 indent_level -= 1; 2410 /* 2411 yield '\n' + (_indent * _current_indent_level) 2412 */ 2413 } 2414 if (PyList_Append(rval, close_dict)) 2415 goto bail; 2416 return 0; 2417 2418 bail: 2419 Py_XDECREF(encoded); 2420 Py_XDECREF(items); 2421 Py_XDECREF(iter); 2422 Py_XDECREF(kstr); 2423 Py_XDECREF(ident); 2424 return -1; 2425 } 2426 2427 2428 static int 2429 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) 2430 { 2431 /* Encode Python list seq to a JSON term, rval is a PyList */ 2432 static PyObject *open_array = NULL; 2433 static PyObject *close_array = NULL; 2434 static PyObject *empty_array = NULL; 2435 PyObject *ident = NULL; 2436 PyObject *iter = NULL; 2437 PyObject *obj = NULL; 2438 int is_true; 2439 int i = 0; 2440 2441 if (open_array == NULL || close_array == NULL || empty_array == NULL) { 2442 open_array = PyString_InternFromString("["); 2443 close_array = PyString_InternFromString("]"); 2444 empty_array = PyString_InternFromString("[]"); 2445 if (open_array == NULL || close_array == NULL || empty_array == NULL) 2446 return -1; 2447 } 2448 ident = NULL; 2449 is_true = PyObject_IsTrue(seq); 2450 if (is_true == -1) 2451 return -1; 2452 else if (is_true == 0) 2453 return PyList_Append(rval, empty_array); 2454 2455 if (s->markers != Py_None) { 2456 int has_key; 2457 ident = PyLong_FromVoidPtr(seq); 2458 if (ident == NULL) 2459 goto bail; 2460 has_key = PyDict_Contains(s->markers, ident); 2461 if (has_key) { 2462 if (has_key != -1) 2463 PyErr_SetString(PyExc_ValueError, "Circular reference detected"); 2464 goto bail; 2465 } 2466 if (PyDict_SetItem(s->markers, ident, seq)) { 2467 goto bail; 2468 } 2469 } 2470 2471 iter = PyObject_GetIter(seq); 2472 if (iter == NULL) 2473 goto bail; 2474 2475 if (PyList_Append(rval, open_array)) 2476 goto bail; 2477 if (s->indent != Py_None) { 2478 /* TODO: DOES NOT RUN */ 2479 indent_level += 1; 2480 /* 2481 newline_indent = '\n' + (_indent * _current_indent_level) 2482 separator = _item_separator + newline_indent 2483 buf += newline_indent 2484 */ 2485 } 2486 while ((obj = PyIter_Next(iter))) { 2487 if (i) { 2488 if (PyList_Append(rval, s->item_separator)) 2489 goto bail; 2490 } 2491 if (encoder_listencode_obj(s, rval, obj, indent_level)) 2492 goto bail; 2493 i++; 2494 Py_CLEAR(obj); 2495 } 2496 Py_CLEAR(iter); 2497 if (PyErr_Occurred()) 2498 goto bail; 2499 if (ident != NULL) { 2500 if (PyDict_DelItem(s->markers, ident)) 2501 goto bail; 2502 Py_CLEAR(ident); 2503 } 2504 if (s->indent != Py_None) { 2505 /* TODO: DOES NOT RUN */ 2506 indent_level -= 1; 2507 /* 2508 yield '\n' + (_indent * _current_indent_level) 2509 */ 2510 } 2511 if (PyList_Append(rval, close_array)) 2512 goto bail; 2513 return 0; 2514 2515 bail: 2516 Py_XDECREF(obj); 2517 Py_XDECREF(iter); 2518 Py_XDECREF(ident); 2519 return -1; 2520 } 2521 2522 static void 2523 encoder_dealloc(PyObject *self) 2524 { 2525 /* Deallocate Encoder */ 2526 encoder_clear(self); 2527 Py_TYPE(self)->tp_free(self); 2528 } 2529 2530 static int 2531 encoder_traverse(PyObject *self, visitproc visit, void *arg) 2532 { 2533 PyEncoderObject *s; 2534 assert(PyEncoder_Check(self)); 2535 s = (PyEncoderObject *)self; 2536 Py_VISIT(s->markers); 2537 Py_VISIT(s->defaultfn); 2538 Py_VISIT(s->encoder); 2539 Py_VISIT(s->indent); 2540 Py_VISIT(s->key_separator); 2541 Py_VISIT(s->item_separator); 2542 Py_VISIT(s->sort_keys); 2543 Py_VISIT(s->skipkeys); 2544 Py_VISIT(s->key_memo); 2545 return 0; 2546 } 2547 2548 static int 2549 encoder_clear(PyObject *self) 2550 { 2551 /* Deallocate Encoder */ 2552 PyEncoderObject *s; 2553 assert(PyEncoder_Check(self)); 2554 s = (PyEncoderObject *)self; 2555 Py_CLEAR(s->markers); 2556 Py_CLEAR(s->defaultfn); 2557 Py_CLEAR(s->encoder); 2558 Py_CLEAR(s->indent); 2559 Py_CLEAR(s->key_separator); 2560 Py_CLEAR(s->item_separator); 2561 Py_CLEAR(s->sort_keys); 2562 Py_CLEAR(s->skipkeys); 2563 Py_CLEAR(s->key_memo); 2564 return 0; 2565 } 2566 2567 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); 2568 2569 static 2570 PyTypeObject PyEncoderType = { 2571 PyObject_HEAD_INIT(NULL) 2572 0, /* tp_internal */ 2573 "simplejson._speedups.Encoder", /* tp_name */ 2574 sizeof(PyEncoderObject), /* tp_basicsize */ 2575 0, /* tp_itemsize */ 2576 encoder_dealloc, /* tp_dealloc */ 2577 0, /* tp_print */ 2578 0, /* tp_getattr */ 2579 0, /* tp_setattr */ 2580 0, /* tp_compare */ 2581 0, /* tp_repr */ 2582 0, /* tp_as_number */ 2583 0, /* tp_as_sequence */ 2584 0, /* tp_as_mapping */ 2585 0, /* tp_hash */ 2586 encoder_call, /* tp_call */ 2587 0, /* tp_str */ 2588 0, /* tp_getattro */ 2589 0, /* tp_setattro */ 2590 0, /* tp_as_buffer */ 2591 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 2592 encoder_doc, /* tp_doc */ 2593 encoder_traverse, /* tp_traverse */ 2594 encoder_clear, /* tp_clear */ 2595 0, /* tp_richcompare */ 2596 0, /* tp_weaklistoffset */ 2597 0, /* tp_iter */ 2598 0, /* tp_iternext */ 2599 0, /* tp_methods */ 2600 encoder_members, /* tp_members */ 2601 0, /* tp_getset */ 2602 0, /* tp_base */ 2603 0, /* tp_dict */ 2604 0, /* tp_descr_get */ 2605 0, /* tp_descr_set */ 2606 0, /* tp_dictoffset */ 2607 encoder_init, /* tp_init */ 2608 0, /* tp_alloc */ 2609 encoder_new, /* tp_new */ 2610 0, /* tp_free */ 2611 }; 2612 204 2613 static PyMethodDef speedups_methods[] = { 205 DEFN(encode_basestring_ascii, METH_O), 206 {} 2614 {"encode_basestring_ascii", 2615 (PyCFunction)py_encode_basestring_ascii, 2616 METH_O, 2617 pydoc_encode_basestring_ascii}, 2618 {"scanstring", 2619 (PyCFunction)py_scanstring, 2620 METH_VARARGS, 2621 pydoc_scanstring}, 2622 {NULL, NULL, 0, NULL} 207 2623 }; 208 #undef DEFN 2624 2625 PyDoc_STRVAR(module_doc, 2626 "simplejson speedups\n"); 209 2627 210 2628 void 211 2629 init_speedups(void) 212 2630 { 213 PyObject *m; 214 m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION); 215 } 2631 PyObject *m, *decimal; 2632 PyScannerType.tp_new = PyType_GenericNew; 2633 if (PyType_Ready(&PyScannerType) < 0) 2634 return; 2635 PyEncoderType.tp_new = PyType_GenericNew; 2636 if (PyType_Ready(&PyEncoderType) < 0) 2637 return; 2638 2639 decimal = PyImport_ImportModule("decimal"); 2640 if (decimal == NULL) 2641 return; 2642 DecimalTypePtr = (PyTypeObject*)PyObject_GetAttrString(decimal, "Decimal"); 2643 Py_DECREF(decimal); 2644 if (DecimalTypePtr == NULL) 2645 return; 2646 2647 m = Py_InitModule3("_speedups", speedups_methods, module_doc); 2648 Py_INCREF((PyObject*)&PyScannerType); 2649 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); 2650 Py_INCREF((PyObject*)&PyEncoderType); 2651 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); 2652 } -
trunk/Tools/Scripts/webkitpy/thirdparty/simplejson/decoder.py
r65360 r91167 1 """ 2 Implementation of JSONDecoder 1 """Implementation of JSONDecoder 3 2 """ 4 3 import re 5 6 from scanner import Scanner, pattern 4 import sys 5 import struct 6 7 from scanner import make_scanner 8 def _import_c_scanstring(): 9 try: 10 from simplejson._speedups import scanstring 11 return scanstring 12 except ImportError: 13 return None 14 c_scanstring = _import_c_scanstring() 15 16 __all__ = ['JSONDecoder'] 7 17 8 18 FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 9 19 10 20 def _floatconstants(): 11 import struct12 import sys13 21 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') 22 # The struct module in Python 2.4 would get frexp() out of range here 23 # when an endian is specified in the format string. Fixed in Python 2.5+ 14 24 if sys.byteorder != 'big': 15 25 _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1] … … 18 28 19 29 NaN, PosInf, NegInf = _floatconstants() 30 31 32 class JSONDecodeError(ValueError): 33 """Subclass of ValueError with the following additional properties: 34 35 msg: The unformatted error message 36 doc: The JSON document being parsed 37 pos: The start index of doc where parsing failed 38 end: The end index of doc where parsing failed (may be None) 39 lineno: The line corresponding to pos 40 colno: The column corresponding to pos 41 endlineno: The line corresponding to end (may be None) 42 endcolno: The column corresponding to end (may be None) 43 44 """ 45 def __init__(self, msg, doc, pos, end=None): 46 ValueError.__init__(self, errmsg(msg, doc, pos, end=end)) 47 self.msg = msg 48 self.doc = doc 49 self.pos = pos 50 self.end = end 51 self.lineno, self.colno = linecol(doc, pos) 52 if end is not None: 53 self.endlineno, self.endcolno = linecol(doc, end) 54 else: 55 self.endlineno, self.endcolno = None, None 56 20 57 21 58 def linecol(doc, pos): … … 27 64 return lineno, colno 28 65 66 29 67 def errmsg(msg, doc, pos, end=None): 68 # Note that this function is called from _speedups 30 69 lineno, colno = linecol(doc, pos) 31 70 if end is None: 32 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos) 71 #fmt = '{0}: line {1} column {2} (char {3})' 72 #return fmt.format(msg, lineno, colno, pos) 73 fmt = '%s: line %d column %d (char %d)' 74 return fmt % (msg, lineno, colno, pos) 33 75 endlineno, endcolno = linecol(doc, end) 34 return '%s: line %d column %d - line %d column %d (char %d - %d)' % ( 35 msg, lineno, colno, endlineno, endcolno, pos, end) 76 #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' 77 #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) 78 fmt = '%s: line %d column %d - line %d column %d (char %d - %d)' 79 return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end) 80 36 81 37 82 _CONSTANTS = { … … 39 84 'Infinity': PosInf, 40 85 'NaN': NaN, 41 'true': True,42 'false': False,43 'null': None,44 86 } 45 87 46 def JSONConstant(match, context, c=_CONSTANTS): 47 return c[match.group(0)], None 48 pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) 49 50 def JSONNumber(match, context): 51 match = JSONNumber.regex.match(match.string, *match.span()) 52 integer, frac, exp = match.groups() 53 if frac or exp: 54 res = float(integer + (frac or '') + (exp or '')) 55 else: 56 res = int(integer) 57 return res, None 58 pattern(r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?')(JSONNumber) 59 60 STRINGCHUNK = re.compile(r'(.*?)(["\\])', FLAGS) 88 STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 61 89 BACKSLASH = { 62 90 '"': u'"', '\\': u'\\', '/': u'/', … … 66 94 DEFAULT_ENCODING = "utf-8" 67 95 68 def scanstring(s, end, encoding=None, _b=BACKSLASH, _m=STRINGCHUNK.match): 96 def py_scanstring(s, end, encoding=None, strict=True, 97 _b=BACKSLASH, _m=STRINGCHUNK.match): 98 """Scan the string s for a JSON string. End is the index of the 99 character in s after the quote that started the JSON string. 100 Unescapes all valid JSON string escape sequences and raises ValueError 101 on attempt to decode an invalid string. If strict is False then literal 102 control characters are allowed in the string. 103 104 Returns a tuple of the decoded string and the index of the character in s 105 after the end quote.""" 69 106 if encoding is None: 70 107 encoding = DEFAULT_ENCODING … … 75 112 chunk = _m(s, end) 76 113 if chunk is None: 77 raise ValueError(78 errmsg("Unterminated string starting at", s, begin))114 raise JSONDecodeError( 115 "Unterminated string starting at", s, begin) 79 116 end = chunk.end() 80 117 content, terminator = chunk.groups() 118 # Content is contains zero or more unescaped string characters 81 119 if content: 82 120 if not isinstance(content, unicode): 83 121 content = unicode(content, encoding) 84 122 _append(content) 123 # Terminator is the end of string, a literal control character, 124 # or a backslash denoting that an escape sequence follows 85 125 if terminator == '"': 86 126 break 127 elif terminator != '\\': 128 if strict: 129 msg = "Invalid control character %r at" % (terminator,) 130 #msg = "Invalid control character {0!r} at".format(terminator) 131 raise JSONDecodeError(msg, s, end) 132 else: 133 _append(terminator) 134 continue 87 135 try: 88 136 esc = s[end] 89 137 except IndexError: 90 raise ValueError( 91 errmsg("Unterminated string starting at", s, begin)) 138 raise JSONDecodeError( 139 "Unterminated string starting at", s, begin) 140 # If not a unicode escape sequence, must be in the lookup table 92 141 if esc != 'u': 93 142 try: 94 m= _b[esc]143 char = _b[esc] 95 144 except KeyError: 96 raise ValueError(97 errmsg("Invalid \\escape: %r" % (esc,), s, end))145 msg = "Invalid \\escape: " + repr(esc) 146 raise JSONDecodeError(msg, s, end) 98 147 end += 1 99 148 else: 149 # Unicode escape sequence 100 150 esc = s[end + 1:end + 5] 101 try: 102 m = unichr(int(esc, 16)) 103 if len(esc) != 4 or not esc.isalnum(): 104 raise ValueError 105 except ValueError: 106 raise ValueError(errmsg("Invalid \\uXXXX escape", s, end)) 107 end += 5 108 _append(m) 151 next_end = end + 5 152 if len(esc) != 4: 153 msg = "Invalid \\uXXXX escape" 154 raise JSONDecodeError(msg, s, end) 155 uni = int(esc, 16) 156 # Check for surrogate pair on UCS-4 systems 157 if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: 158 msg = "Invalid \\uXXXX\\uXXXX surrogate pair" 159 if not s[end + 5:end + 7] == '\\u': 160 raise JSONDecodeError(msg, s, end) 161 esc2 = s[end + 7:end + 11] 162 if len(esc2) != 4: 163 raise JSONDecodeError(msg, s, end) 164 uni2 = int(esc2, 16) 165 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) 166 next_end += 6 167 char = unichr(uni) 168 end = next_end 169 # Append the unescaped character 170 _append(char) 109 171 return u''.join(chunks), end 110 172 111 def JSONString(match, context): 112 encoding = getattr(context, 'encoding', None) 113 return scanstring(match.string, match.end(), encoding) 114 pattern(r'"')(JSONString) 115 116 WHITESPACE = re.compile(r'\s*', FLAGS) 117 118 def JSONObject(match, context, _w=WHITESPACE.match): 119 pairs = {} 120 s = match.string 121 end = _w(s, match.end()).end() 173 174 # Use speedup if available 175 scanstring = c_scanstring or py_scanstring 176 177 WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 178 WHITESPACE_STR = ' \t\n\r' 179 180 def JSONObject((s, end), encoding, strict, scan_once, object_hook, 181 object_pairs_hook, memo=None, 182 _w=WHITESPACE.match, _ws=WHITESPACE_STR): 183 # Backwards compatibility 184 if memo is None: 185 memo = {} 186 memo_get = memo.setdefault 187 pairs = [] 188 # Use a slice to prevent IndexError from being raised, the following 189 # check will raise a more specific ValueError if the string is empty 122 190 nextchar = s[end:end + 1] 123 # trivial empty object 124 if nextchar == '}': 125 return pairs, end + 1 191 # Normally we expect nextchar == '"' 126 192 if nextchar != '"': 127 raise ValueError(errmsg("Expecting property name", s, end)) 193 if nextchar in _ws: 194 end = _w(s, end).end() 195 nextchar = s[end:end + 1] 196 # Trivial empty object 197 if nextchar == '}': 198 if object_pairs_hook is not None: 199 result = object_pairs_hook(pairs) 200 return result, end + 1 201 pairs = {} 202 if object_hook is not None: 203 pairs = object_hook(pairs) 204 return pairs, end + 1 205 elif nextchar != '"': 206 raise JSONDecodeError("Expecting property name", s, end) 128 207 end += 1 129 encoding = getattr(context, 'encoding', None)130 iterscan = JSONScanner.iterscan131 208 while True: 132 key, end = scanstring(s, end, encoding) 133 end = _w(s, end).end() 209 key, end = scanstring(s, end, encoding, strict) 210 key = memo_get(key, key) 211 212 # To skip some function call overhead we optimize the fast paths where 213 # the JSON key separator is ": " or just ":". 134 214 if s[end:end + 1] != ':': 135 raise ValueError(errmsg("Expecting : delimiter", s, end)) 136 end = _w(s, end + 1).end() 137 try: 138 value, end = iterscan(s, idx=end, context=context).next() 215 end = _w(s, end).end() 216 if s[end:end + 1] != ':': 217 raise JSONDecodeError("Expecting : delimiter", s, end) 218 219 end += 1 220 221 try: 222 if s[end] in _ws: 223 end += 1 224 if s[end] in _ws: 225 end = _w(s, end + 1).end() 226 except IndexError: 227 pass 228 229 try: 230 value, end = scan_once(s, end) 139 231 except StopIteration: 140 raise ValueError(errmsg("Expecting object", s, end)) 141 pairs[key] = value 142 end = _w(s, end).end() 143 nextchar = s[end:end + 1] 232 raise JSONDecodeError("Expecting object", s, end) 233 pairs.append((key, value)) 234 235 try: 236 nextchar = s[end] 237 if nextchar in _ws: 238 end = _w(s, end + 1).end() 239 nextchar = s[end] 240 except IndexError: 241 nextchar = '' 144 242 end += 1 243 145 244 if nextchar == '}': 146 245 break 147 if nextchar != ',': 148 raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) 149 end = _w(s, end).end() 150 nextchar = s[end:end + 1] 246 elif nextchar != ',': 247 raise JSONDecodeError("Expecting , delimiter", s, end - 1) 248 249 try: 250 nextchar = s[end] 251 if nextchar in _ws: 252 end += 1 253 nextchar = s[end] 254 if nextchar in _ws: 255 end = _w(s, end + 1).end() 256 nextchar = s[end] 257 except IndexError: 258 nextchar = '' 259 151 260 end += 1 152 261 if nextchar != '"': 153 raise ValueError(errmsg("Expecting property name", s, end - 1)) 154 object_hook = getattr(context, 'object_hook', None) 262 raise JSONDecodeError("Expecting property name", s, end - 1) 263 264 if object_pairs_hook is not None: 265 result = object_pairs_hook(pairs) 266 return result, end 267 pairs = dict(pairs) 155 268 if object_hook is not None: 156 269 pairs = object_hook(pairs) 157 270 return pairs, end 158 pattern(r'{')(JSONObject) 159 160 def JSONArray(match, context, _w=WHITESPACE.match): 271 272 def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 161 273 values = [] 162 s = match.string163 end = _w(s, match.end()).end()164 # look-ahead for trivial empty array165 274 nextchar = s[end:end + 1] 275 if nextchar in _ws: 276 end = _w(s, end + 1).end() 277 nextchar = s[end:end + 1] 278 # Look-ahead for trivial empty array 166 279 if nextchar == ']': 167 280 return values, end + 1 168 iterscan = JSONScanner.iterscan281 _append = values.append 169 282 while True: 170 283 try: 171 value, end = iterscan(s, idx=end, context=context).next()284 value, end = scan_once(s, end) 172 285 except StopIteration: 173 raise ValueError(errmsg("Expecting object", s, end)) 174 values.append(value) 175 end = _w(s, end).end() 286 raise JSONDecodeError("Expecting object", s, end) 287 _append(value) 176 288 nextchar = s[end:end + 1] 289 if nextchar in _ws: 290 end = _w(s, end + 1).end() 291 nextchar = s[end:end + 1] 177 292 end += 1 178 293 if nextchar == ']': 179 294 break 180 if nextchar != ',': 181 raise ValueError(errmsg("Expecting , delimiter", s, end)) 182 end = _w(s, end).end() 295 elif nextchar != ',': 296 raise JSONDecodeError("Expecting , delimiter", s, end) 297 298 try: 299 if s[end] in _ws: 300 end += 1 301 if s[end] in _ws: 302 end = _w(s, end + 1).end() 303 except IndexError: 304 pass 305 183 306 return values, end 184 pattern(r'\[')(JSONArray)185 186 ANYTHING = [187 JSONObject,188 JSONArray,189 JSONString,190 JSONConstant,191 JSONNumber,192 ]193 194 JSONScanner = Scanner(ANYTHING)195 307 196 308 class JSONDecoder(object): 197 """ 198 Simple JSON <http://json.org> decoder 199 200 Performs the following translations in decoding: 201 309 """Simple JSON <http://json.org> decoder 310 311 Performs the following translations in decoding by default: 312 202 313 +---------------+-------------------+ 203 314 | JSON | Python | … … 222 333 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 223 334 their corresponding ``float`` values, which is outside the JSON spec. 335 224 336 """ 225 337 226 _scanner = Scanner(ANYTHING) 227 __all__ = ['__init__', 'decode', 'raw_decode'] 228 229 def __init__(self, encoding=None, object_hook=None): 338 def __init__(self, encoding=None, object_hook=None, parse_float=None, 339 parse_int=None, parse_constant=None, strict=True, 340 object_pairs_hook=None): 230 341 """ 231 ``encoding`` determines the encoding used to interpret any ``str``232 objects decoded by this instance (utf-8 by default). It has no233 effect when decoding ``unicode`` objects.234 342 *encoding* determines the encoding used to interpret any 343 :class:`str` objects decoded by this instance (``'utf-8'`` by 344 default). It has no effect when decoding :class:`unicode` objects. 345 235 346 Note that currently only encodings that are a superset of ASCII work, 236 strings of other encodings should be passed in as ``unicode``.237 238 ``object_hook``, if specified, will be called with the result239 of every JSON object decoded and its return value will be used in240 place of the given ``dict``. This can be used to provide custom347 strings of other encodings should be passed in as :class:`unicode`. 348 349 *object_hook*, if specified, will be called with the result of every 350 JSON object decoded and its return value will be used in place of the 351 given :class:`dict`. This can be used to provide custom 241 352 deserializations (e.g. to support JSON-RPC class hinting). 353 354 *object_pairs_hook* is an optional function that will be called with 355 the result of any object literal decode with an ordered list of pairs. 356 The return value of *object_pairs_hook* will be used instead of the 357 :class:`dict`. This feature can be used to implement custom decoders 358 that rely on the order that the key and value pairs are decoded (for 359 example, :func:`collections.OrderedDict` will remember the order of 360 insertion). If *object_hook* is also defined, the *object_pairs_hook* 361 takes priority. 362 363 *parse_float*, if specified, will be called with the string of every 364 JSON float to be decoded. By default, this is equivalent to 365 ``float(num_str)``. This can be used to use another datatype or parser 366 for JSON floats (e.g. :class:`decimal.Decimal`). 367 368 *parse_int*, if specified, will be called with the string of every 369 JSON int to be decoded. By default, this is equivalent to 370 ``int(num_str)``. This can be used to use another datatype or parser 371 for JSON integers (e.g. :class:`float`). 372 373 *parse_constant*, if specified, will be called with one of the 374 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This 375 can be used to raise an exception if invalid JSON numbers are 376 encountered. 377 378 *strict* controls the parser's behavior when it encounters an 379 invalid control character in a string. The default setting of 380 ``True`` means that unescaped control characters are parse errors, if 381 ``False`` then control characters will be allowed in strings. 382 242 383 """ 243 384 self.encoding = encoding 244 385 self.object_hook = object_hook 386 self.object_pairs_hook = object_pairs_hook 387 self.parse_float = parse_float or float 388 self.parse_int = parse_int or int 389 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 390 self.strict = strict 391 self.parse_object = JSONObject 392 self.parse_array = JSONArray 393 self.parse_string = scanstring 394 self.memo = {} 395 self.scan_once = make_scanner(self) 245 396 246 397 def decode(self, s, _w=WHITESPACE.match): 247 """ 248 Return the Python representation of ``s`` (a ``str`` or ``unicode`` 398 """Return the Python representation of ``s`` (a ``str`` or ``unicode`` 249 399 instance containing a JSON document) 400 250 401 """ 251 402 obj, end = self.raw_decode(s, idx=_w(s, 0).end()) 252 403 end = _w(s, end).end() 253 404 if end != len(s): 254 raise ValueError(errmsg("Extra data", s, end, len(s)))405 raise JSONDecodeError("Extra data", s, end, len(s)) 255 406 return obj 256 407 257 def raw_decode(self, s, **kw): 258 """ 259 Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning 260 with a JSON document) and return a 2-tuple of the Python 408 def raw_decode(self, s, idx=0): 409 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` 410 beginning with a JSON document) and return a 2-tuple of the Python 261 411 representation and the index in ``s`` where the document ended. 262 412 263 413 This can be used to decode a JSON document from a string that may 264 414 have extraneous data at the end. 415 265 416 """ 266 kw.setdefault('context', self) 267 try: 268 obj, end = self._scanner.iterscan(s, **kw).next() 417 try: 418 obj, end = self.scan_once(s, idx) 269 419 except StopIteration: 270 raise ValueError("No JSON object could be decoded")420 raise JSONDecodeError("No JSON object could be decoded", s, idx) 271 421 return obj, end 272 273 __all__ = ['JSONDecoder'] -
trunk/Tools/Scripts/webkitpy/thirdparty/simplejson/encoder.py
r54087 r91167 1 """ 2 Implementation of JSONEncoder 1 """Implementation of JSONEncoder 3 2 """ 4 3 import re 5 try: 6 from simplejson import _speedups 7 except ImportError: 8 _speedups = None 9 10 ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]') 11 ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])') 4 from decimal import Decimal 5 6 def _import_speedups(): 7 try: 8 from simplejson import _speedups 9 return _speedups.encode_basestring_ascii, _speedups.make_encoder 10 except ImportError: 11 return None, None 12 c_encode_basestring_ascii, c_make_encoder = _import_speedups() 13 14 from decoder import PosInf 15 16 ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]') 17 ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])') 18 HAS_UTF8 = re.compile(r'[\x80-\xff]') 12 19 ESCAPE_DCT = { 13 # escape all forward slashes to prevent </script> attack14 '/': '\\/',15 20 '\\': '\\\\', 16 21 '"': '\\"', … … 22 27 } 23 28 for i in range(0x20): 29 #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i)) 24 30 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,)) 25 31 26 # assume this produces an infinity on all machines (probably not guaranteed) 27 INFINITY = float('1e66666') 28 29 def floatstr(o, allow_nan=True): 30 # Check for specials. Note that this type of test is processor- and/or 31 # platform-specific, so do tests which don't depend on the internals. 32 33 if o != o: 34 text = 'NaN' 35 elif o == INFINITY: 36 text = 'Infinity' 37 elif o == -INFINITY: 38 text = '-Infinity' 39 else: 40 return repr(o) 41 42 if not allow_nan: 43 raise ValueError("Out of range float values are not JSON compliant: %r" 44 % (o,)) 45 46 return text 47 32 FLOAT_REPR = repr 48 33 49 34 def encode_basestring(s): 35 """Return a JSON representation of a Python string 36 50 37 """ 51 Return a JSON representation of a Python string52 """38 if isinstance(s, str) and HAS_UTF8.search(s) is not None: 39 s = s.decode('utf-8') 53 40 def replace(match): 54 41 return ESCAPE_DCT[match.group(0)] 55 return '"' + ESCAPE.sub(replace, s) + '"' 56 57 def encode_basestring_ascii(s): 42 return u'"' + ESCAPE.sub(replace, s) + u'"' 43 44 45 def py_encode_basestring_ascii(s): 46 """Return an ASCII-only JSON representation of a Python string 47 48 """ 49 if isinstance(s, str) and HAS_UTF8.search(s) is not None: 50 s = s.decode('utf-8') 58 51 def replace(match): 59 52 s = match.group(0) … … 63 56 n = ord(s) 64 57 if n < 0x10000: 58 #return '\\u{0:04x}'.format(n) 65 59 return '\\u%04x' % (n,) 66 60 else: … … 69 63 s1 = 0xd800 | ((n >> 10) & 0x3ff) 70 64 s2 = 0xdc00 | (n & 0x3ff) 65 #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2) 71 66 return '\\u%04x\\u%04x' % (s1, s2) 72 67 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"' 73 74 try: 75 encode_basestring_ascii = _speedups.encode_basestring_ascii 76 _need_utf8 = True 77 except AttributeError: 78 _need_utf8 = False 68 69 70 encode_basestring_ascii = ( 71 c_encode_basestring_ascii or py_encode_basestring_ascii) 79 72 80 73 class JSONEncoder(object): 81 """ 82 Extensible JSON <http://json.org> encoder for Python data structures. 74 """Extensible JSON <http://json.org> encoder for Python data structures. 83 75 84 76 Supports the following objects and types by default: 85 77 86 78 +-------------------+---------------+ 87 79 | Python | JSON | … … 106 98 object for ``o`` if possible, otherwise it should call the superclass 107 99 implementation (to raise ``TypeError``). 100 108 101 """ 109 __all__ = ['__init__', 'default', 'encode', 'iterencode']110 102 item_separator = ', ' 111 103 key_separator = ': ' 112 104 def __init__(self, skipkeys=False, ensure_ascii=True, 113 105 check_circular=True, allow_nan=True, sort_keys=False, 114 indent=None, separators=None, encoding='utf-8' ):115 """116 Constructor for JSONEncoder, with sensible defaults.117 118 If skipkeys is False, then it is a TypeError to attempt106 indent=None, separators=None, encoding='utf-8', default=None, 107 use_decimal=False): 108 """Constructor for JSONEncoder, with sensible defaults. 109 110 If skipkeys is false, then it is a TypeError to attempt 119 111 encoding of keys that are not str, int, long, float or None. If 120 112 skipkeys is True, such items are simply skipped. 121 113 122 If ensure_ascii is True, the output is guaranteed to be str114 If ensure_ascii is true, the output is guaranteed to be str 123 115 objects with all incoming unicode characters escaped. If 124 116 ensure_ascii is false, the output will be unicode object. 125 117 126 If check_circular is True, then lists, dicts, and custom encoded118 If check_circular is true, then lists, dicts, and custom encoded 127 119 objects will be checked for circular references during encoding to 128 120 prevent an infinite recursion (which would cause an OverflowError). 129 121 Otherwise, no such check takes place. 130 122 131 If allow_nan is True, then NaN, Infinity, and -Infinity will be123 If allow_nan is true, then NaN, Infinity, and -Infinity will be 132 124 encoded as such. This behavior is not JSON specification compliant, 133 125 but is consistent with most JavaScript based encoders and decoders. 134 126 Otherwise, it will be a ValueError to encode such floats. 135 127 136 If sort_keys is True, then the output of dictionaries will be128 If sort_keys is true, then the output of dictionaries will be 137 129 sorted by key; this is useful for regression tests to ensure 138 130 that JSON serializations can be compared on a day-to-day basis. 139 131 140 If indent is a non-negative integer, then JSON array 141 elements and object members will be pretty-printed with that 142 indent level. An indent level of 0 will only insert newlines. 143 None is the most compact representation. 132 If indent is a string, then JSON array elements and object members 133 will be pretty-printed with a newline followed by that string repeated 134 for each level of nesting. ``None`` (the default) selects the most compact 135 representation without any newlines. For backwards compatibility with 136 versions of simplejson earlier than 2.1.0, an integer is also accepted 137 and is converted to a string with that many spaces. 144 138 145 139 If specified, separators should be a (item_separator, key_separator) 146 tuple. The default is (', ', ': ').To get the most compact JSON140 tuple. The default is (', ', ': '). To get the most compact JSON 147 141 representation you should specify (',', ':') to eliminate whitespace. 148 142 143 If specified, default is a function that gets called for objects 144 that can't otherwise be serialized. It should return a JSON encodable 145 version of the object or raise a ``TypeError``. 146 149 147 If encoding is not None, then all input strings will be 150 transformed into unicode using that encoding prior to JSON-encoding. 148 transformed into unicode using that encoding prior to JSON-encoding. 151 149 The default is UTF-8. 150 151 If use_decimal is true (not the default), ``decimal.Decimal`` will 152 be supported directly by the encoder. For the inverse, decode JSON 153 with ``parse_float=decimal.Decimal``. 154 152 155 """ 153 156 … … 157 160 self.allow_nan = allow_nan 158 161 self.sort_keys = sort_keys 162 self.use_decimal = use_decimal 163 if isinstance(indent, (int, long)): 164 indent = ' ' * indent 159 165 self.indent = indent 160 self.current_indent_level = 0161 166 if separators is not None: 162 167 self.item_separator, self.key_separator = separators 168 elif indent is not None: 169 self.item_separator = ',' 170 if default is not None: 171 self.default = default 163 172 self.encoding = encoding 164 173 165 def _newline_indent(self): 166 return '\n' + (' ' * (self.indent * self.current_indent_level)) 167 168 def _iterencode_list(self, lst, markers=None): 174 def default(self, o): 175 """Implement this method in a subclass such that it returns 176 a serializable object for ``o``, or calls the base implementation 177 (to raise a ``TypeError``). 178 179 For example, to support arbitrary iterators, you could 180 implement default like this:: 181 182 def default(self, o): 183 try: 184 iterable = iter(o) 185 except TypeError: 186 pass 187 else: 188 return list(iterable) 189 return JSONEncoder.default(self, o) 190 191 """ 192 raise TypeError(repr(o) + " is not JSON serializable") 193 194 def encode(self, o): 195 """Return a JSON string representation of a Python data structure. 196 197 >>> from simplejson import JSONEncoder 198 >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) 199 '{"foo": ["bar", "baz"]}' 200 201 """ 202 # This is for extremely simple cases and benchmarks. 203 if isinstance(o, basestring): 204 if isinstance(o, str): 205 _encoding = self.encoding 206 if (_encoding is not None 207 and not (_encoding == 'utf-8')): 208 o = o.decode(_encoding) 209 if self.ensure_ascii: 210 return encode_basestring_ascii(o) 211 else: 212 return encode_basestring(o) 213 # This doesn't pass the iterator directly to ''.join() because the 214 # exceptions aren't as detailed. The list call should be roughly 215 # equivalent to the PySequence_Fast that ''.join() would do. 216 chunks = self.iterencode(o, _one_shot=True) 217 if not isinstance(chunks, (list, tuple)): 218 chunks = list(chunks) 219 if self.ensure_ascii: 220 return ''.join(chunks) 221 else: 222 return u''.join(chunks) 223 224 def iterencode(self, o, _one_shot=False): 225 """Encode the given object and yield each string 226 representation as available. 227 228 For example:: 229 230 for chunk in JSONEncoder().iterencode(bigobject): 231 mysocket.write(chunk) 232 233 """ 234 if self.check_circular: 235 markers = {} 236 else: 237 markers = None 238 if self.ensure_ascii: 239 _encoder = encode_basestring_ascii 240 else: 241 _encoder = encode_basestring 242 if self.encoding != 'utf-8': 243 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding): 244 if isinstance(o, str): 245 o = o.decode(_encoding) 246 return _orig_encoder(o) 247 248 def floatstr(o, allow_nan=self.allow_nan, 249 _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf): 250 # Check for specials. Note that this type of test is processor 251 # and/or platform-specific, so do tests which don't depend on 252 # the internals. 253 254 if o != o: 255 text = 'NaN' 256 elif o == _inf: 257 text = 'Infinity' 258 elif o == _neginf: 259 text = '-Infinity' 260 else: 261 return _repr(o) 262 263 if not allow_nan: 264 raise ValueError( 265 "Out of range float values are not JSON compliant: " + 266 repr(o)) 267 268 return text 269 270 271 key_memo = {} 272 if (_one_shot and c_make_encoder is not None 273 and self.indent is None): 274 _iterencode = c_make_encoder( 275 markers, self.default, _encoder, self.indent, 276 self.key_separator, self.item_separator, self.sort_keys, 277 self.skipkeys, self.allow_nan, key_memo, self.use_decimal) 278 else: 279 _iterencode = _make_iterencode( 280 markers, self.default, _encoder, self.indent, floatstr, 281 self.key_separator, self.item_separator, self.sort_keys, 282 self.skipkeys, _one_shot, self.use_decimal) 283 try: 284 return _iterencode(o, 0) 285 finally: 286 key_memo.clear() 287 288 289 class JSONEncoderForHTML(JSONEncoder): 290 """An encoder that produces JSON safe to embed in HTML. 291 292 To embed JSON content in, say, a script tag on a web page, the 293 characters &, < and > should be escaped. They cannot be escaped 294 with the usual entities (e.g. &) because they are not expanded 295 within <script> tags. 296 """ 297 298 def encode(self, o): 299 # Override JSONEncoder.encode because it has hacks for 300 # performance that make things more complicated. 301 chunks = self.iterencode(o, True) 302 if self.ensure_ascii: 303 return ''.join(chunks) 304 else: 305 return u''.join(chunks) 306 307 def iterencode(self, o, _one_shot=False): 308 chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot) 309 for chunk in chunks: 310 chunk = chunk.replace('&', '\\u0026') 311 chunk = chunk.replace('<', '\\u003c') 312 chunk = chunk.replace('>', '\\u003e') 313 yield chunk 314 315 316 def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, 317 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, 318 _use_decimal, 319 ## HACK: hand-optimized bytecode; turn globals into locals 320 False=False, 321 True=True, 322 ValueError=ValueError, 323 basestring=basestring, 324 Decimal=Decimal, 325 dict=dict, 326 float=float, 327 id=id, 328 int=int, 329 isinstance=isinstance, 330 list=list, 331 long=long, 332 str=str, 333 tuple=tuple, 334 ): 335 336 def _iterencode_list(lst, _current_indent_level): 169 337 if not lst: 170 338 yield '[]' … … 175 343 raise ValueError("Circular reference detected") 176 344 markers[markerid] = lst 177 yield'['178 if self.indent is not None:179 self.current_indent_level += 1180 newline_indent = self._newline_indent()181 separator = self.item_separator + newline_indent182 yieldnewline_indent345 buf = '[' 346 if _indent is not None: 347 _current_indent_level += 1 348 newline_indent = '\n' + (_indent * _current_indent_level) 349 separator = _item_separator + newline_indent 350 buf += newline_indent 183 351 else: 184 352 newline_indent = None 185 separator = self.item_separator353 separator = _item_separator 186 354 first = True 187 355 for value in lst: … … 189 357 first = False 190 358 else: 191 yield separator 192 for chunk in self._iterencode(value, markers): 193 yield chunk 359 buf = separator 360 if isinstance(value, basestring): 361 yield buf + _encoder(value) 362 elif value is None: 363 yield buf + 'null' 364 elif value is True: 365 yield buf + 'true' 366 elif value is False: 367 yield buf + 'false' 368 elif isinstance(value, (int, long)): 369 yield buf + str(value) 370 elif isinstance(value, float): 371 yield buf + _floatstr(value) 372 elif _use_decimal and isinstance(value, Decimal): 373 yield buf + str(value) 374 else: 375 yield buf 376 if isinstance(value, (list, tuple)): 377 chunks = _iterencode_list(value, _current_indent_level) 378 elif isinstance(value, dict): 379 chunks = _iterencode_dict(value, _current_indent_level) 380 else: 381 chunks = _iterencode(value, _current_indent_level) 382 for chunk in chunks: 383 yield chunk 194 384 if newline_indent is not None: 195 self.current_indent_level -= 1196 yield self._newline_indent()385 _current_indent_level -= 1 386 yield '\n' + (_indent * _current_indent_level) 197 387 yield ']' 198 388 if markers is not None: 199 389 del markers[markerid] 200 390 201 def _iterencode_dict( self, dct, markers=None):391 def _iterencode_dict(dct, _current_indent_level): 202 392 if not dct: 203 393 yield '{}' … … 209 399 markers[markerid] = dct 210 400 yield '{' 211 key_separator = self.key_separator 212 if self.indent is not None: 213 self.current_indent_level += 1 214 newline_indent = self._newline_indent() 215 item_separator = self.item_separator + newline_indent 401 if _indent is not None: 402 _current_indent_level += 1 403 newline_indent = '\n' + (_indent * _current_indent_level) 404 item_separator = _item_separator + newline_indent 216 405 yield newline_indent 217 406 else: 218 407 newline_indent = None 219 item_separator = self.item_separator408 item_separator = _item_separator 220 409 first = True 221 if self.ensure_ascii: 222 encoder = encode_basestring_ascii 223 else: 224 encoder = encode_basestring 225 allow_nan = self.allow_nan 226 if self.sort_keys: 227 keys = dct.keys() 228 keys.sort() 229 items = [(k, dct[k]) for k in keys] 410 if _sort_keys: 411 items = dct.items() 412 items.sort(key=lambda kv: kv[0]) 230 413 else: 231 414 items = dct.iteritems() 232 _encoding = self.encoding233 _do_decode = (_encoding is not None234 and not (_need_utf8 and _encoding == 'utf-8'))235 415 for key, value in items: 236 if isinstance(key, str): 237 if _do_decode: 238 key = key.decode(_encoding) 239 elif isinstance(key, basestring): 416 if isinstance(key, basestring): 240 417 pass 241 418 # JavaScript is weakly typed for these, so it makes sense to 242 419 # also allow them. Many encoders seem to do something like this. 243 420 elif isinstance(key, float): 244 key = floatstr(key, allow_nan) 245 elif isinstance(key, (int, long)): 246 key = str(key) 421 key = _floatstr(key) 247 422 elif key is True: 248 423 key = 'true' … … 251 426 elif key is None: 252 427 key = 'null' 253 elif self.skipkeys: 428 elif isinstance(key, (int, long)): 429 key = str(key) 430 elif _skipkeys: 254 431 continue 255 432 else: 256 raise TypeError("key %r is not a string" % (key,))433 raise TypeError("key " + repr(key) + " is not a string") 257 434 if first: 258 435 first = False 259 436 else: 260 437 yield item_separator 261 yield encoder(key) 262 yield key_separator 263 for chunk in self._iterencode(value, markers): 264 yield chunk 438 yield _encoder(key) 439 yield _key_separator 440 if isinstance(value, basestring): 441 yield _encoder(value) 442 elif value is None: 443 yield 'null' 444 elif value is True: 445 yield 'true' 446 elif value is False: 447 yield 'false' 448 elif isinstance(value, (int, long)): 449 yield str(value) 450 elif isinstance(value, float): 451 yield _floatstr(value) 452 elif _use_decimal and isinstance(value, Decimal): 453 yield str(value) 454 else: 455 if isinstance(value, (list, tuple)): 456 chunks = _iterencode_list(value, _current_indent_level) 457 elif isinstance(value, dict): 458 chunks = _iterencode_dict(value, _current_indent_level) 459 else: 460 chunks = _iterencode(value, _current_indent_level) 461 for chunk in chunks: 462 yield chunk 265 463 if newline_indent is not None: 266 self.current_indent_level -= 1267 yield self._newline_indent()464 _current_indent_level -= 1 465 yield '\n' + (_indent * _current_indent_level) 268 466 yield '}' 269 467 if markers is not None: 270 468 del markers[markerid] 271 469 272 def _iterencode( self, o, markers=None):470 def _iterencode(o, _current_indent_level): 273 471 if isinstance(o, basestring): 274 if self.ensure_ascii: 275 encoder = encode_basestring_ascii 276 else: 277 encoder = encode_basestring 278 _encoding = self.encoding 279 if (_encoding is not None and isinstance(o, str) 280 and not (_need_utf8 and _encoding == 'utf-8')): 281 o = o.decode(_encoding) 282 yield encoder(o) 472 yield _encoder(o) 283 473 elif o is None: 284 474 yield 'null' … … 290 480 yield str(o) 291 481 elif isinstance(o, float): 292 yield floatstr(o, self.allow_nan)482 yield _floatstr(o) 293 483 elif isinstance(o, (list, tuple)): 294 for chunk in self._iterencode_list(o, markers):484 for chunk in _iterencode_list(o, _current_indent_level): 295 485 yield chunk 296 486 elif isinstance(o, dict): 297 for chunk in self._iterencode_dict(o, markers):487 for chunk in _iterencode_dict(o, _current_indent_level): 298 488 yield chunk 489 elif _use_decimal and isinstance(o, Decimal): 490 yield str(o) 299 491 else: 300 492 if markers is not None: … … 303 495 raise ValueError("Circular reference detected") 304 496 markers[markerid] = o 305 for chunk in self._iterencode_default(o, markers): 497 o = _default(o) 498 for chunk in _iterencode(o, _current_indent_level): 306 499 yield chunk 307 500 if markers is not None: 308 501 del markers[markerid] 309 502 310 def _iterencode_default(self, o, markers=None): 311 newobj = self.default(o) 312 return self._iterencode(newobj, markers) 313 314 def default(self, o): 315 """ 316 Implement this method in a subclass such that it returns 317 a serializable object for ``o``, or calls the base implementation 318 (to raise a ``TypeError``). 319 320 For example, to support arbitrary iterators, you could 321 implement default like this:: 322 323 def default(self, o): 324 try: 325 iterable = iter(o) 326 except TypeError: 327 pass 328 else: 329 return list(iterable) 330 return JSONEncoder.default(self, o) 331 """ 332 raise TypeError("%r is not JSON serializable" % (o,)) 333 334 def encode(self, o): 335 """ 336 Return a JSON string representation of a Python data structure. 337 338 >>> JSONEncoder().encode({"foo": ["bar", "baz"]}) 339 '{"foo":["bar", "baz"]}' 340 """ 341 # This is for extremely simple cases and benchmarks... 342 if isinstance(o, basestring): 343 if isinstance(o, str): 344 _encoding = self.encoding 345 if (_encoding is not None 346 and not (_encoding == 'utf-8' and _need_utf8)): 347 o = o.decode(_encoding) 348 return encode_basestring_ascii(o) 349 # This doesn't pass the iterator directly to ''.join() because it 350 # sucks at reporting exceptions. It's going to do this internally 351 # anyway because it uses PySequence_Fast or similar. 352 chunks = list(self.iterencode(o)) 353 return ''.join(chunks) 354 355 def iterencode(self, o): 356 """ 357 Encode the given object and yield each string 358 representation as available. 359 360 For example:: 361 362 for chunk in JSONEncoder().iterencode(bigobject): 363 mysocket.write(chunk) 364 """ 365 if self.check_circular: 366 markers = {} 367 else: 368 markers = None 369 return self._iterencode(o, markers) 370 371 __all__ = ['JSONEncoder'] 503 return _iterencode -
trunk/Tools/Scripts/webkitpy/thirdparty/simplejson/scanner.py
r54087 r91167 1 """JSON token scanner 1 2 """ 2 Iterator based sre token scanner3 """4 import sre_parse, sre_compile, sre_constants5 from sre_constants import BRANCH, SUBPATTERN6 from re import VERBOSE, MULTILINE, DOTALL7 3 import re 4 def _import_c_make_scanner(): 5 try: 6 from simplejson._speedups import make_scanner 7 return make_scanner 8 except ImportError: 9 return None 10 c_make_scanner = _import_c_make_scanner() 8 11 9 __all__ = [' Scanner', 'pattern']12 __all__ = ['make_scanner'] 10 13 11 FLAGS = (VERBOSE | MULTILINE | DOTALL) 12 class Scanner(object): 13 def __init__(self, lexicon, flags=FLAGS): 14 self.actions = [None] 15 # combine phrases into a compound pattern 16 s = sre_parse.Pattern() 17 s.flags = flags 18 p = [] 19 for idx, token in enumerate(lexicon): 20 phrase = token.pattern 21 try: 22 subpattern = sre_parse.SubPattern(s, 23 [(SUBPATTERN, (idx + 1, sre_parse.parse(phrase, flags)))]) 24 except sre_constants.error: 25 raise 26 p.append(subpattern) 27 self.actions.append(token) 14 NUMBER_RE = re.compile( 15 r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', 16 (re.VERBOSE | re.MULTILINE | re.DOTALL)) 28 17 29 p = sre_parse.SubPattern(s, [(BRANCH, (None, p))]) 30 self.scanner = sre_compile.compile(p) 18 def py_make_scanner(context): 19 parse_object = context.parse_object 20 parse_array = context.parse_array 21 parse_string = context.parse_string 22 match_number = NUMBER_RE.match 23 encoding = context.encoding 24 strict = context.strict 25 parse_float = context.parse_float 26 parse_int = context.parse_int 27 parse_constant = context.parse_constant 28 object_hook = context.object_hook 29 object_pairs_hook = context.object_pairs_hook 30 memo = context.memo 31 31 32 def _scan_once(string, idx): 33 try: 34 nextchar = string[idx] 35 except IndexError: 36 raise StopIteration 32 37 33 def iterscan(self, string, idx=0, context=None): 34 """ 35 Yield match, end_idx for each match 36 """ 37 match = self.scanner.scanner(string, idx).match 38 actions = self.actions 39 lastend = idx 40 end = len(string) 41 while True: 42 m = match() 43 if m is None: 44 break 45 matchbegin, matchend = m.span() 46 if lastend == matchend: 47 break 48 action = actions[m.lastindex] 49 if action is not None: 50 rval, next_pos = action(m, context) 51 if next_pos is not None and next_pos != matchend: 52 # "fast forward" the scanner 53 matchend = next_pos 54 match = self.scanner.scanner(string, matchend).match 55 yield rval, matchend 56 lastend = matchend 57 58 def pattern(pattern, flags=FLAGS): 59 def decorator(fn): 60 fn.pattern = pattern 61 fn.regex = re.compile(pattern, flags) 62 return fn 63 return decorator 38 if nextchar == '"': 39 return parse_string(string, idx + 1, encoding, strict) 40 elif nextchar == '{': 41 return parse_object((string, idx + 1), encoding, strict, 42 _scan_once, object_hook, object_pairs_hook, memo) 43 elif nextchar == '[': 44 return parse_array((string, idx + 1), _scan_once) 45 elif nextchar == 'n' and string[idx:idx + 4] == 'null': 46 return None, idx + 4 47 elif nextchar == 't' and string[idx:idx + 4] == 'true': 48 return True, idx + 4 49 elif nextchar == 'f' and string[idx:idx + 5] == 'false': 50 return False, idx + 5 51 52 m = match_number(string, idx) 53 if m is not None: 54 integer, frac, exp = m.groups() 55 if frac or exp: 56 res = parse_float(integer + (frac or '') + (exp or '')) 57 else: 58 res = parse_int(integer) 59 return res, m.end() 60 elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': 61 return parse_constant('NaN'), idx + 3 62 elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': 63 return parse_constant('Infinity'), idx + 8 64 elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': 65 return parse_constant('-Infinity'), idx + 9 66 else: 67 raise StopIteration 68 69 def scan_once(string, idx): 70 try: 71 return _scan_once(string, idx) 72 finally: 73 memo.clear() 74 75 return scan_once 76 77 make_scanner = c_make_scanner or py_make_scanner -
trunk/Tools/Scripts/webkitpy/tool/servers/reflectionhandler.py
r90968 r91167 37 37 import BaseHTTPServer 38 38 39 import cgi 39 40 import codecs 40 41 import datetime … … 64 65 if "?" in self.path: 65 66 path, query_string = self.path.split("?", 1) 66 self.query = urlparse.parse_qs(query_string)67 self.query = cgi.parse_qs(query_string) 67 68 else: 68 69 path = self.path
Note: See TracChangeset
for help on using the changeset viewer.