modjson.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. /*
  2. * This file is part of the MicroPython project, http://micropython.org/
  3. *
  4. * The MIT License (MIT)
  5. *
  6. * Copyright (c) 2014-2019 Damien P. George
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #include <stdio.h>
  27. #include "py/objlist.h"
  28. #include "py/objstringio.h"
  29. #include "py/parsenum.h"
  30. #include "py/runtime.h"
  31. #include "py/stream.h"
  32. #if MICROPY_PY_JSON
  33. #if MICROPY_PY_JSON_SEPARATORS
  34. enum {
  35. DUMP_MODE_TO_STRING = 1,
  36. DUMP_MODE_TO_STREAM = 2,
  37. };
  38. static mp_obj_t mod_json_dump_helper(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args, unsigned int mode) {
  39. enum { ARG_separators };
  40. static const mp_arg_t allowed_args[] = {
  41. { MP_QSTR_separators, MP_ARG_KW_ONLY | MP_ARG_OBJ, {.u_rom_obj = MP_ROM_NONE} },
  42. };
  43. mp_arg_val_t args[MP_ARRAY_SIZE(allowed_args)];
  44. mp_arg_parse_all(n_args - mode, pos_args + mode, kw_args, MP_ARRAY_SIZE(allowed_args), allowed_args, args);
  45. mp_print_ext_t print_ext;
  46. if (args[ARG_separators].u_obj == mp_const_none) {
  47. print_ext.item_separator = ", ";
  48. print_ext.key_separator = ": ";
  49. } else {
  50. mp_obj_t *items;
  51. mp_obj_get_array_fixed_n(args[ARG_separators].u_obj, 2, &items);
  52. print_ext.item_separator = mp_obj_str_get_str(items[0]);
  53. print_ext.key_separator = mp_obj_str_get_str(items[1]);
  54. }
  55. if (mode == DUMP_MODE_TO_STRING) {
  56. // dumps(obj)
  57. vstr_t vstr;
  58. vstr_init_print(&vstr, 8, &print_ext.base);
  59. mp_obj_print_helper(&print_ext.base, pos_args[0], PRINT_JSON);
  60. return mp_obj_new_str_from_utf8_vstr(&vstr);
  61. } else {
  62. // dump(obj, stream)
  63. print_ext.base.data = MP_OBJ_TO_PTR(pos_args[1]);
  64. print_ext.base.print_strn = mp_stream_write_adaptor;
  65. mp_get_stream_raise(pos_args[1], MP_STREAM_OP_WRITE);
  66. mp_obj_print_helper(&print_ext.base, pos_args[0], PRINT_JSON);
  67. return mp_const_none;
  68. }
  69. }
  70. static mp_obj_t mod_json_dump(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
  71. return mod_json_dump_helper(n_args, pos_args, kw_args, DUMP_MODE_TO_STREAM);
  72. }
  73. static MP_DEFINE_CONST_FUN_OBJ_KW(mod_json_dump_obj, 2, mod_json_dump);
  74. static mp_obj_t mod_json_dumps(size_t n_args, const mp_obj_t *pos_args, mp_map_t *kw_args) {
  75. return mod_json_dump_helper(n_args, pos_args, kw_args, DUMP_MODE_TO_STRING);
  76. }
  77. static MP_DEFINE_CONST_FUN_OBJ_KW(mod_json_dumps_obj, 1, mod_json_dumps);
  78. #else
  79. static mp_obj_t mod_json_dump(mp_obj_t obj, mp_obj_t stream) {
  80. mp_get_stream_raise(stream, MP_STREAM_OP_WRITE);
  81. mp_print_t print = {MP_OBJ_TO_PTR(stream), mp_stream_write_adaptor};
  82. mp_obj_print_helper(&print, obj, PRINT_JSON);
  83. return mp_const_none;
  84. }
  85. static MP_DEFINE_CONST_FUN_OBJ_2(mod_json_dump_obj, mod_json_dump);
  86. static mp_obj_t mod_json_dumps(mp_obj_t obj) {
  87. vstr_t vstr;
  88. mp_print_t print;
  89. vstr_init_print(&vstr, 8, &print);
  90. mp_obj_print_helper(&print, obj, PRINT_JSON);
  91. return mp_obj_new_str_from_utf8_vstr(&vstr);
  92. }
  93. static MP_DEFINE_CONST_FUN_OBJ_1(mod_json_dumps_obj, mod_json_dumps);
  94. #endif
  95. // The function below implements a simple non-recursive JSON parser.
  96. //
  97. // The JSON specification is at http://www.ietf.org/rfc/rfc4627.txt
  98. // The parser here will parse any valid JSON and return the correct
  99. // corresponding Python object. It allows through a superset of JSON, since
  100. // it treats commas and colons as "whitespace", and doesn't care if
  101. // brackets/braces are correctly paired. It will raise a ValueError if the
  102. // input is outside it's specs.
  103. //
  104. // Most of the work is parsing the primitives (null, false, true, numbers,
  105. // strings). It does 1 pass over the input stream. It tries to be fast and
  106. // small in code size, while not using more RAM than necessary.
  107. typedef struct _json_stream_t {
  108. mp_obj_t stream_obj;
  109. mp_uint_t (*read)(mp_obj_t obj, void *buf, mp_uint_t size, int *errcode);
  110. int errcode;
  111. byte cur;
  112. } json_stream_t;
  113. #define S_EOF (0) // null is not allowed in json stream so is ok as EOF marker
  114. #define S_END(s) ((s).cur == S_EOF)
  115. #define S_CUR(s) ((s).cur)
  116. #define S_NEXT(s) (json_stream_next(&(s)))
  117. static byte json_stream_next(json_stream_t *s) {
  118. mp_uint_t ret = s->read(s->stream_obj, &s->cur, 1, &s->errcode);
  119. if (s->errcode != 0) {
  120. mp_raise_OSError(s->errcode);
  121. }
  122. if (ret == 0) {
  123. s->cur = S_EOF;
  124. }
  125. return s->cur;
  126. }
  127. static mp_obj_t mod_json_load(mp_obj_t stream_obj) {
  128. const mp_stream_p_t *stream_p = mp_get_stream_raise(stream_obj, MP_STREAM_OP_READ);
  129. json_stream_t s = {stream_obj, stream_p->read, 0, 0};
  130. vstr_t vstr;
  131. vstr_init(&vstr, 8);
  132. mp_obj_list_t stack; // we use a list as a simple stack for nested JSON
  133. stack.len = 0;
  134. stack.items = NULL;
  135. mp_obj_t stack_top = MP_OBJ_NULL;
  136. const mp_obj_type_t *stack_top_type = NULL;
  137. mp_obj_t stack_key = MP_OBJ_NULL;
  138. S_NEXT(s);
  139. for (;;) {
  140. cont:
  141. if (S_END(s)) {
  142. break;
  143. }
  144. mp_obj_t next = MP_OBJ_NULL;
  145. bool enter = false;
  146. byte cur = S_CUR(s);
  147. S_NEXT(s);
  148. switch (cur) {
  149. case ',':
  150. case ':':
  151. case ' ':
  152. case '\t':
  153. case '\n':
  154. case '\r':
  155. goto cont;
  156. case 'n':
  157. if (S_CUR(s) == 'u' && S_NEXT(s) == 'l' && S_NEXT(s) == 'l') {
  158. S_NEXT(s);
  159. next = mp_const_none;
  160. } else {
  161. goto fail;
  162. }
  163. break;
  164. case 'f':
  165. if (S_CUR(s) == 'a' && S_NEXT(s) == 'l' && S_NEXT(s) == 's' && S_NEXT(s) == 'e') {
  166. S_NEXT(s);
  167. next = mp_const_false;
  168. } else {
  169. goto fail;
  170. }
  171. break;
  172. case 't':
  173. if (S_CUR(s) == 'r' && S_NEXT(s) == 'u' && S_NEXT(s) == 'e') {
  174. S_NEXT(s);
  175. next = mp_const_true;
  176. } else {
  177. goto fail;
  178. }
  179. break;
  180. case '"':
  181. vstr_reset(&vstr);
  182. for (; !S_END(s) && S_CUR(s) != '"';) {
  183. byte c = S_CUR(s);
  184. if (c == '\\') {
  185. c = S_NEXT(s);
  186. switch (c) {
  187. case 'b':
  188. c = 0x08;
  189. break;
  190. case 'f':
  191. c = 0x0c;
  192. break;
  193. case 'n':
  194. c = 0x0a;
  195. break;
  196. case 'r':
  197. c = 0x0d;
  198. break;
  199. case 't':
  200. c = 0x09;
  201. break;
  202. case 'u': {
  203. mp_uint_t num = 0;
  204. for (int i = 0; i < 4; i++) {
  205. c = (S_NEXT(s) | 0x20) - '0';
  206. if (c > 9) {
  207. c -= ('a' - ('9' + 1));
  208. }
  209. num = (num << 4) | c;
  210. }
  211. vstr_add_char(&vstr, num);
  212. goto str_cont;
  213. }
  214. }
  215. }
  216. vstr_add_byte(&vstr, c);
  217. str_cont:
  218. S_NEXT(s);
  219. }
  220. if (S_END(s)) {
  221. goto fail;
  222. }
  223. S_NEXT(s);
  224. next = mp_obj_new_str(vstr.buf, vstr.len);
  225. break;
  226. case '-':
  227. case '0':
  228. case '1':
  229. case '2':
  230. case '3':
  231. case '4':
  232. case '5':
  233. case '6':
  234. case '7':
  235. case '8':
  236. case '9': {
  237. bool flt = false;
  238. vstr_reset(&vstr);
  239. for (;;) {
  240. vstr_add_byte(&vstr, cur);
  241. cur = S_CUR(s);
  242. if (cur == '.' || cur == 'E' || cur == 'e') {
  243. flt = true;
  244. } else if (cur == '+' || cur == '-' || unichar_isdigit(cur)) {
  245. // pass
  246. } else {
  247. break;
  248. }
  249. S_NEXT(s);
  250. }
  251. if (flt) {
  252. next = mp_parse_num_float(vstr.buf, vstr.len, false, NULL);
  253. } else {
  254. next = mp_parse_num_integer(vstr.buf, vstr.len, 10, NULL);
  255. }
  256. break;
  257. }
  258. case '[':
  259. next = mp_obj_new_list(0, NULL);
  260. enter = true;
  261. break;
  262. case '{':
  263. next = mp_obj_new_dict(0);
  264. enter = true;
  265. break;
  266. case '}':
  267. case ']': {
  268. if (stack_top == MP_OBJ_NULL) {
  269. // no object at all
  270. goto fail;
  271. }
  272. if (stack.len == 0) {
  273. // finished; compound object
  274. goto success;
  275. }
  276. stack.len -= 1;
  277. stack_top = stack.items[stack.len];
  278. stack_top_type = mp_obj_get_type(stack_top);
  279. goto cont;
  280. }
  281. default:
  282. goto fail;
  283. }
  284. if (stack_top == MP_OBJ_NULL) {
  285. stack_top = next;
  286. stack_top_type = mp_obj_get_type(stack_top);
  287. if (!enter) {
  288. // finished; single primitive only
  289. goto success;
  290. }
  291. } else {
  292. // append to list or dict
  293. if (stack_top_type == &mp_type_list) {
  294. mp_obj_list_append(stack_top, next);
  295. } else {
  296. if (stack_key == MP_OBJ_NULL) {
  297. stack_key = next;
  298. if (enter) {
  299. goto fail;
  300. }
  301. } else {
  302. mp_obj_dict_store(stack_top, stack_key, next);
  303. stack_key = MP_OBJ_NULL;
  304. }
  305. }
  306. if (enter) {
  307. if (stack.items == NULL) {
  308. mp_obj_list_init(&stack, 1);
  309. stack.items[0] = stack_top;
  310. } else {
  311. mp_obj_list_append(MP_OBJ_FROM_PTR(&stack), stack_top);
  312. }
  313. stack_top = next;
  314. stack_top_type = mp_obj_get_type(stack_top);
  315. }
  316. }
  317. }
  318. success:
  319. // eat trailing whitespace
  320. while (unichar_isspace(S_CUR(s))) {
  321. S_NEXT(s);
  322. }
  323. if (!S_END(s)) {
  324. // unexpected chars
  325. goto fail;
  326. }
  327. if (stack_top == MP_OBJ_NULL || stack.len != 0) {
  328. // not exactly 1 object
  329. goto fail;
  330. }
  331. vstr_clear(&vstr);
  332. return stack_top;
  333. fail:
  334. mp_raise_ValueError(MP_ERROR_TEXT("syntax error in JSON"));
  335. }
  336. static MP_DEFINE_CONST_FUN_OBJ_1(mod_json_load_obj, mod_json_load);
  337. static mp_obj_t mod_json_loads(mp_obj_t obj) {
  338. mp_buffer_info_t bufinfo;
  339. mp_get_buffer_raise(obj, &bufinfo, MP_BUFFER_READ);
  340. vstr_t vstr = {bufinfo.len, bufinfo.len, (char *)bufinfo.buf, true};
  341. mp_obj_stringio_t sio = {{&mp_type_stringio}, &vstr, 0, MP_OBJ_NULL};
  342. return mod_json_load(MP_OBJ_FROM_PTR(&sio));
  343. }
  344. static MP_DEFINE_CONST_FUN_OBJ_1(mod_json_loads_obj, mod_json_loads);
  345. static const mp_rom_map_elem_t mp_module_json_globals_table[] = {
  346. { MP_ROM_QSTR(MP_QSTR___name__), MP_ROM_QSTR(MP_QSTR_json) },
  347. { MP_ROM_QSTR(MP_QSTR_dump), MP_ROM_PTR(&mod_json_dump_obj) },
  348. { MP_ROM_QSTR(MP_QSTR_dumps), MP_ROM_PTR(&mod_json_dumps_obj) },
  349. { MP_ROM_QSTR(MP_QSTR_load), MP_ROM_PTR(&mod_json_load_obj) },
  350. { MP_ROM_QSTR(MP_QSTR_loads), MP_ROM_PTR(&mod_json_loads_obj) },
  351. };
  352. static MP_DEFINE_CONST_DICT(mp_module_json_globals, mp_module_json_globals_table);
  353. const mp_obj_module_t mp_module_json = {
  354. .base = { &mp_type_module },
  355. .globals = (mp_obj_dict_t *)&mp_module_json_globals,
  356. };
  357. MP_REGISTER_EXTENSIBLE_MODULE(MP_QSTR_json, mp_module_json);
  358. #endif // MICROPY_PY_JSON