0001"""
0002Implementation of JSONEncoder
0003"""
0004import re
0005
0006try:
0007 from simplejson import _speedups
0008except ImportError:
0009 _speedups = None
0010
0011ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
0012ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
0013ESCAPE_DCT = {
0014 '\\': '\\\\',
0015 '"': '\\"',
0016 '\b': '\\b',
0017 '\f': '\\f',
0018 '\n': '\\n',
0019 '\r': '\\r',
0020 '\t': '\\t',
0021}
0022for i in range(0x20):
0023 ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
0024
0025
0026INFINITY = float('1e66666')
0027FLOAT_REPR = repr
0028
0029def floatstr(o, allow_nan=True):
0030
0031
0032
0033 if o != o:
0034 text = 'NaN'
0035 elif o == INFINITY:
0036 text = 'Infinity'
0037 elif o == -INFINITY:
0038 text = '-Infinity'
0039 else:
0040 return FLOAT_REPR(o)
0041
0042 if not allow_nan:
0043 raise ValueError("Out of range float values are not JSON compliant: %r"
0044 % (o,))
0045
0046 return text
0047
0048
0049def encode_basestring(s):
0050 """
0051 Return a JSON representation of a Python string
0052 """
0053 def replace(match):
0054 return ESCAPE_DCT[match.group(0)]
0055 return '"' + ESCAPE.sub(replace, s) + '"'
0056
0057
0058def encode_basestring_ascii(s):
0059 def replace(match):
0060 s = match.group(0)
0061 try:
0062 return ESCAPE_DCT[s]
0063 except KeyError:
0064 n = ord(s)
0065 if n < 0x10000:
0066 return '\\u%04x' % (n,)
0067 else:
0068
0069 n -= 0x10000
0070 s1 = 0xd800 | ((n >> 10) & 0x3ff)
0071 s2 = 0xdc00 | (n & 0x3ff)
0072 return '\\u%04x\\u%04x' % (s1, s2)
0073 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
0074
0075
0076try:
0077 encode_basestring_ascii = _speedups.encode_basestring_ascii
0078 _need_utf8 = True
0079except AttributeError:
0080 _need_utf8 = False
0081
0082
0083class JSONEncoder(object):
0084 """
0085 Extensible JSON <http://json.org> encoder for Python data structures.
0086
0087 Supports the following objects and types by default:
0088
0089 +-------------------+---------------+
0090 | Python | JSON |
0091 +===================+===============+
0092 | dict | object |
0093 +-------------------+---------------+
0094 | list, tuple | array |
0095 +-------------------+---------------+
0096 | str, unicode | string |
0097 +-------------------+---------------+
0098 | int, long, float | number |
0099 +-------------------+---------------+
0100 | True | true |
0101 +-------------------+---------------+
0102 | False | false |
0103 +-------------------+---------------+
0104 | None | null |
0105 +-------------------+---------------+
0106
0107 To extend this to recognize other objects, subclass and implement a
0108 ``.default()`` method with another method that returns a serializable
0109 object for ``o`` if possible, otherwise it should call the superclass
0110 implementation (to raise ``TypeError``).
0111 """
0112 __all__ = ['__init__', 'default', 'encode', 'iterencode']
0113 item_separator = ', '
0114 key_separator = ': '
0115 def __init__(self, skipkeys=False, ensure_ascii=True,
0116 check_circular=True, allow_nan=True, sort_keys=False,
0117 indent=None, separators=None, encoding='utf-8', default=None):
0118 """
0119 Constructor for JSONEncoder, with sensible defaults.
0120
0121 If skipkeys is False, then it is a TypeError to attempt
0122 encoding of keys that are not str, int, long, float or None. If
0123 skipkeys is True, such items are simply skipped.
0124
0125 If ensure_ascii is True, the output is guaranteed to be str
0126 objects with all incoming unicode characters escaped. If
0127 ensure_ascii is false, the output will be unicode object.
0128
0129 If check_circular is True, then lists, dicts, and custom encoded
0130 objects will be checked for circular references during encoding to
0131 prevent an infinite recursion (which would cause an OverflowError).
0132 Otherwise, no such check takes place.
0133
0134 If allow_nan is True, then NaN, Infinity, and -Infinity will be
0135 encoded as such. This behavior is not JSON specification compliant,
0136 but is consistent with most JavaScript based encoders and decoders.
0137 Otherwise, it will be a ValueError to encode such floats.
0138
0139 If sort_keys is True, then the output of dictionaries will be
0140 sorted by key; this is useful for regression tests to ensure
0141 that JSON serializations can be compared on a day-to-day basis.
0142
0143 If indent is a non-negative integer, then JSON array
0144 elements and object members will be pretty-printed with that
0145 indent level. An indent level of 0 will only insert newlines.
0146 None is the most compact representation.
0147
0148 If specified, separators should be a (item_separator, key_separator)
0149 tuple. The default is (', ', ': '). To get the most compact JSON
0150 representation you should specify (',', ':') to eliminate whitespace.
0151
0152 If specified, default is a function that gets called for objects
0153 that can't otherwise be serialized. It should return a JSON encodable
0154 version of the object or raise a ``TypeError``.
0155
0156 If encoding is not None, then all input strings will be
0157 transformed into unicode using that encoding prior to JSON-encoding.
0158 The default is UTF-8.
0159 """
0160
0161 self.skipkeys = skipkeys
0162 self.ensure_ascii = ensure_ascii
0163 self.check_circular = check_circular
0164 self.allow_nan = allow_nan
0165 self.sort_keys = sort_keys
0166 self.indent = indent
0167 self.current_indent_level = 0
0168 if separators is not None:
0169 self.item_separator, self.key_separator = separators
0170 if default is not None:
0171 self.default = default
0172 self.encoding = encoding
0173
0174 def _newline_indent(self):
0175 return '\n' + (' ' * (self.indent * self.current_indent_level))
0176
0177 def _iterencode_list(self, lst, markers=None):
0178 if not lst:
0179 yield '[]'
0180 return
0181 if markers is not None:
0182 markerid = id(lst)
0183 if markerid in markers:
0184 raise ValueError("Circular reference detected")
0185 markers[markerid] = lst
0186 yield '['
0187 if self.indent is not None:
0188 self.current_indent_level += 1
0189 newline_indent = self._newline_indent()
0190 separator = self.item_separator + newline_indent
0191 yield newline_indent
0192 else:
0193 newline_indent = None
0194 separator = self.item_separator
0195 first = True
0196 for value in lst:
0197 if first:
0198 first = False
0199 else:
0200 yield separator
0201 for chunk in self._iterencode(value, markers):
0202 yield chunk
0203 if newline_indent is not None:
0204 self.current_indent_level -= 1
0205 yield self._newline_indent()
0206 yield ']'
0207 if markers is not None:
0208 del markers[markerid]
0209
0210 def _iterencode_dict(self, dct, markers=None):
0211 if not dct:
0212 yield '{}'
0213 return
0214 if markers is not None:
0215 markerid = id(dct)
0216 if markerid in markers:
0217 raise ValueError("Circular reference detected")
0218 markers[markerid] = dct
0219 yield '{'
0220 key_separator = self.key_separator
0221 if self.indent is not None:
0222 self.current_indent_level += 1
0223 newline_indent = self._newline_indent()
0224 item_separator = self.item_separator + newline_indent
0225 yield newline_indent
0226 else:
0227 newline_indent = None
0228 item_separator = self.item_separator
0229 first = True
0230 if self.ensure_ascii:
0231 encoder = encode_basestring_ascii
0232 else:
0233 encoder = encode_basestring
0234 allow_nan = self.allow_nan
0235 if self.sort_keys:
0236 keys = dct.keys()
0237 keys.sort()
0238 items = [(k, dct[k]) for k in keys]
0239 else:
0240 items = dct.iteritems()
0241 _encoding = self.encoding
0242 _do_decode = (_encoding is not None
0243 and not (_need_utf8 and _encoding == 'utf-8'))
0244 for key, value in items:
0245 if isinstance(key, str):
0246 if _do_decode:
0247 key = key.decode(_encoding)
0248 elif isinstance(key, basestring):
0249 pass
0250
0251
0252 elif isinstance(key, float):
0253 key = floatstr(key, allow_nan)
0254 elif isinstance(key, (int, long)):
0255 key = str(key)
0256 elif key is True:
0257 key = 'true'
0258 elif key is False:
0259 key = 'false'
0260 elif key is None:
0261 key = 'null'
0262 elif self.skipkeys:
0263 continue
0264 else:
0265 raise TypeError("key %r is not a string" % (key,))
0266 if first:
0267 first = False
0268 else:
0269 yield item_separator
0270 yield encoder(key)
0271 yield key_separator
0272 for chunk in self._iterencode(value, markers):
0273 yield chunk
0274 if newline_indent is not None:
0275 self.current_indent_level -= 1
0276 yield self._newline_indent()
0277 yield '}'
0278 if markers is not None:
0279 del markers[markerid]
0280
0281 def _iterencode(self, o, markers=None):
0282 if isinstance(o, basestring):
0283 if self.ensure_ascii:
0284 encoder = encode_basestring_ascii
0285 else:
0286 encoder = encode_basestring
0287 _encoding = self.encoding
0288 if (_encoding is not None and isinstance(o, str)
0289 and not (_need_utf8 and _encoding == 'utf-8')):
0290 o = o.decode(_encoding)
0291 yield encoder(o)
0292 elif o is None:
0293 yield 'null'
0294 elif o is True:
0295 yield 'true'
0296 elif o is False:
0297 yield 'false'
0298 elif isinstance(o, (int, long)):
0299 yield str(o)
0300 elif isinstance(o, float):
0301 yield floatstr(o, self.allow_nan)
0302 elif isinstance(o, (list, tuple)):
0303 for chunk in self._iterencode_list(o, markers):
0304 yield chunk
0305 elif isinstance(o, dict):
0306 for chunk in self._iterencode_dict(o, markers):
0307 yield chunk
0308 else:
0309 if markers is not None:
0310 markerid = id(o)
0311 if markerid in markers:
0312 raise ValueError("Circular reference detected")
0313 markers[markerid] = o
0314 for chunk in self._iterencode_default(o, markers):
0315 yield chunk
0316 if markers is not None:
0317 del markers[markerid]
0318
0319 def _iterencode_default(self, o, markers=None):
0320 newobj = self.default(o)
0321 return self._iterencode(newobj, markers)
0322
0323 def default(self, o):
0324 """
0325 Implement this method in a subclass such that it returns
0326 a serializable object for ``o``, or calls the base implementation
0327 (to raise a ``TypeError``).
0328
0329 For example, to support arbitrary iterators, you could
0330 implement default like this::
0331
0332 def default(self, o):
0333 try:
0334 iterable = iter(o)
0335 except TypeError:
0336 pass
0337 else:
0338 return list(iterable)
0339 return JSONEncoder.default(self, o)
0340 """
0341 raise TypeError("%r is not JSON serializable" % (o,))
0342
0343 def encode(self, o):
0344 """
0345 Return a JSON string representation of a Python data structure.
0346
0347 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
0348 '{"foo": ["bar", "baz"]}'
0349 """
0350
0351 if isinstance(o, basestring):
0352 if isinstance(o, str):
0353 _encoding = self.encoding
0354 if (_encoding is not None
0355 and not (_encoding == 'utf-8' and _need_utf8)):
0356 o = o.decode(_encoding)
0357 if self.ensure_ascii:
0358 return encode_basestring_ascii(o)
0359 else:
0360 return encode_basestring(o)
0361
0362
0363
0364 chunks = list(self.iterencode(o))
0365 return ''.join(chunks)
0366
0367 def iterencode(self, o):
0368 """
0369 Encode the given object and yield each string
0370 representation as available.
0371
0372 For example::
0373
0374 for chunk in JSONEncoder().iterencode(bigobject):
0375 mysocket.write(chunk)
0376 """
0377 if self.check_circular:
0378 markers = {}
0379 else:
0380 markers = None
0381 return self._iterencode(o, markers)
0382
0383__all__ = ['JSONEncoder']