qstr.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510
  1. /*
  2. * This file is part of the MicroPython project, http://micropython.org/
  3. *
  4. * The MIT License (MIT)
  5. *
  6. * Copyright (c) 2013, 2014 Damien P. George
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #include <assert.h>
  27. #include <string.h>
  28. #include <stdio.h>
  29. #include "py/mpstate.h"
  30. #include "py/qstr.h"
  31. #include "py/gc.h"
  32. #include "py/runtime.h"
  33. #if MICROPY_DEBUG_VERBOSE // print debugging info
  34. #define DEBUG_printf DEBUG_printf
  35. #else // don't print debugging info
  36. #define DEBUG_printf(...) (void)0
  37. #endif
  38. // A qstr is an index into the qstr pool.
  39. // The data for a qstr is \0 terminated (so they can be printed using printf)
  40. #if MICROPY_QSTR_BYTES_IN_HASH
  41. #define Q_HASH_MASK ((1 << (8 * MICROPY_QSTR_BYTES_IN_HASH)) - 1)
  42. #else
  43. #define Q_HASH_MASK (0xffff)
  44. #endif
  45. #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
  46. #define QSTR_ENTER() mp_thread_mutex_lock(&MP_STATE_VM(qstr_mutex), 1)
  47. #define QSTR_EXIT() mp_thread_mutex_unlock(&MP_STATE_VM(qstr_mutex))
  48. #else
  49. #define QSTR_ENTER()
  50. #define QSTR_EXIT()
  51. #endif
  52. // Initial number of entries for qstr pool, set so that the first dynamically
  53. // allocated pool is twice this size. The value here must be <= MP_QSTRnumber_of.
  54. #define MICROPY_ALLOC_QSTR_ENTRIES_INIT (10)
  55. // this must match the equivalent function in makeqstrdata.py
  56. size_t qstr_compute_hash(const byte *data, size_t len) {
  57. // djb2 algorithm; see http://www.cse.yorku.ca/~oz/hash.html
  58. size_t hash = 5381;
  59. for (const byte *top = data + len; data < top; data++) {
  60. hash = ((hash << 5) + hash) ^ (*data); // hash * 33 ^ data
  61. }
  62. hash &= Q_HASH_MASK;
  63. // Make sure that valid hash is never zero, zero means "hash not computed"
  64. if (hash == 0) {
  65. hash++;
  66. }
  67. return hash;
  68. }
  69. // The first pool is the static qstr table. The contents must remain stable as
  70. // it is part of the .mpy ABI. See the top of py/persistentcode.c and
  71. // static_qstr_list in makeqstrdata.py. This pool is unsorted (although in a
  72. // future .mpy version we could re-order them and make it sorted). It also
  73. // contains additional qstrs that must have IDs <256, see unsorted_qstr_list
  74. // in makeqstrdata.py.
  75. #if MICROPY_QSTR_BYTES_IN_HASH
  76. const qstr_hash_t mp_qstr_const_hashes_static[] = {
  77. #ifndef NO_QSTR
  78. #define QDEF0(id, hash, len, str) hash,
  79. #define QDEF1(id, hash, len, str)
  80. #include "genhdr/qstrdefs.generated.h"
  81. #undef QDEF0
  82. #undef QDEF1
  83. #endif
  84. };
  85. #endif
  86. const qstr_len_t mp_qstr_const_lengths_static[] = {
  87. #ifndef NO_QSTR
  88. #define QDEF0(id, hash, len, str) len,
  89. #define QDEF1(id, hash, len, str)
  90. #include "genhdr/qstrdefs.generated.h"
  91. #undef QDEF0
  92. #undef QDEF1
  93. #endif
  94. };
  95. const qstr_pool_t mp_qstr_const_pool_static = {
  96. NULL, // no previous pool
  97. 0, // no previous pool
  98. false, // is_sorted
  99. MICROPY_ALLOC_QSTR_ENTRIES_INIT,
  100. MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
  101. #if MICROPY_QSTR_BYTES_IN_HASH
  102. (qstr_hash_t *)mp_qstr_const_hashes_static,
  103. #endif
  104. (qstr_len_t *)mp_qstr_const_lengths_static,
  105. {
  106. #ifndef NO_QSTR
  107. #define QDEF0(id, hash, len, str) str,
  108. #define QDEF1(id, hash, len, str)
  109. #include "genhdr/qstrdefs.generated.h"
  110. #undef QDEF0
  111. #undef QDEF1
  112. #endif
  113. },
  114. };
  115. // The next pool is the remainder of the qstrs defined in the firmware. This
  116. // is sorted.
  117. #if MICROPY_QSTR_BYTES_IN_HASH
  118. const qstr_hash_t mp_qstr_const_hashes[] = {
  119. #ifndef NO_QSTR
  120. #define QDEF0(id, hash, len, str)
  121. #define QDEF1(id, hash, len, str) hash,
  122. #include "genhdr/qstrdefs.generated.h"
  123. #undef QDEF0
  124. #undef QDEF1
  125. #endif
  126. };
  127. #endif
  128. const qstr_len_t mp_qstr_const_lengths[] = {
  129. #ifndef NO_QSTR
  130. #define QDEF0(id, hash, len, str)
  131. #define QDEF1(id, hash, len, str) len,
  132. #include "genhdr/qstrdefs.generated.h"
  133. #undef QDEF0
  134. #undef QDEF1
  135. #endif
  136. };
  137. const qstr_pool_t mp_qstr_const_pool = {
  138. &mp_qstr_const_pool_static,
  139. MP_QSTRnumber_of_static,
  140. true, // is_sorted
  141. MICROPY_ALLOC_QSTR_ENTRIES_INIT,
  142. MP_QSTRnumber_of - MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
  143. #if MICROPY_QSTR_BYTES_IN_HASH
  144. (qstr_hash_t *)mp_qstr_const_hashes,
  145. #endif
  146. (qstr_len_t *)mp_qstr_const_lengths,
  147. {
  148. #ifndef NO_QSTR
  149. #define QDEF0(id, hash, len, str)
  150. #define QDEF1(id, hash, len, str) str,
  151. #include "genhdr/qstrdefs.generated.h"
  152. #undef QDEF0
  153. #undef QDEF1
  154. #endif
  155. },
  156. };
  157. // If frozen code is enabled, then there is an additional, sorted, ROM pool
  158. // containing additional qstrs required by the frozen code.
  159. #ifdef MICROPY_QSTR_EXTRA_POOL
  160. extern const qstr_pool_t MICROPY_QSTR_EXTRA_POOL;
  161. #define CONST_POOL MICROPY_QSTR_EXTRA_POOL
  162. #else
  163. #define CONST_POOL mp_qstr_const_pool
  164. #endif
  165. void qstr_init(void) {
  166. MP_STATE_VM(last_pool) = (qstr_pool_t *)&CONST_POOL; // we won't modify the const_pool since it has no allocated room left
  167. MP_STATE_VM(qstr_last_chunk) = NULL;
  168. #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
  169. mp_thread_mutex_init(&MP_STATE_VM(qstr_mutex));
  170. #endif
  171. }
  172. static const qstr_pool_t *find_qstr(qstr *q) {
  173. // search pool for this qstr
  174. // total_prev_len==0 in the final pool, so the loop will always terminate
  175. const qstr_pool_t *pool = MP_STATE_VM(last_pool);
  176. while (*q < pool->total_prev_len) {
  177. pool = pool->prev;
  178. }
  179. *q -= pool->total_prev_len;
  180. assert(*q < pool->len);
  181. return pool;
  182. }
  183. // qstr_mutex must be taken while in this function
  184. static qstr qstr_add(mp_uint_t len, const char *q_ptr) {
  185. #if MICROPY_QSTR_BYTES_IN_HASH
  186. mp_uint_t hash = qstr_compute_hash((const byte *)q_ptr, len);
  187. DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", hash, len, len, q_ptr);
  188. #else
  189. DEBUG_printf("QSTR: add len=%d data=%.*s\n", len, len, q_ptr);
  190. #endif
  191. // make sure we have room in the pool for a new qstr
  192. if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) {
  193. size_t new_alloc = MP_STATE_VM(last_pool)->alloc * 2;
  194. #ifdef MICROPY_QSTR_EXTRA_POOL
  195. // Put a lower bound on the allocation size in case the extra qstr pool has few entries
  196. new_alloc = MAX(MICROPY_ALLOC_QSTR_ENTRIES_INIT, new_alloc);
  197. #endif
  198. mp_uint_t pool_size = sizeof(qstr_pool_t)
  199. + (sizeof(const char *)
  200. #if MICROPY_QSTR_BYTES_IN_HASH
  201. + sizeof(qstr_hash_t)
  202. #endif
  203. + sizeof(qstr_len_t)) * new_alloc;
  204. qstr_pool_t *pool = (qstr_pool_t *)m_malloc_maybe(pool_size);
  205. if (pool == NULL) {
  206. // Keep qstr_last_chunk consistent with qstr_pool_t: qstr_last_chunk is not scanned
  207. // at garbage collection since it's reachable from a qstr_pool_t. And the caller of
  208. // this function expects q_ptr to be stored in a qstr_pool_t so it can be reached
  209. // by the collector. If qstr_pool_t allocation failed, qstr_last_chunk needs to be
  210. // NULL'd. Otherwise it may become a dangling pointer at the next garbage collection.
  211. MP_STATE_VM(qstr_last_chunk) = NULL;
  212. QSTR_EXIT();
  213. m_malloc_fail(new_alloc);
  214. }
  215. #if MICROPY_QSTR_BYTES_IN_HASH
  216. pool->hashes = (qstr_hash_t *)(pool->qstrs + new_alloc);
  217. pool->lengths = (qstr_len_t *)(pool->hashes + new_alloc);
  218. #else
  219. pool->lengths = (qstr_len_t *)(pool->qstrs + new_alloc);
  220. #endif
  221. pool->prev = MP_STATE_VM(last_pool);
  222. pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
  223. pool->alloc = new_alloc;
  224. pool->len = 0;
  225. MP_STATE_VM(last_pool) = pool;
  226. DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc);
  227. }
  228. // add the new qstr
  229. mp_uint_t at = MP_STATE_VM(last_pool)->len;
  230. #if MICROPY_QSTR_BYTES_IN_HASH
  231. MP_STATE_VM(last_pool)->hashes[at] = hash;
  232. #endif
  233. MP_STATE_VM(last_pool)->lengths[at] = len;
  234. MP_STATE_VM(last_pool)->qstrs[at] = q_ptr;
  235. MP_STATE_VM(last_pool)->len++;
  236. // return id for the newly-added qstr
  237. return MP_STATE_VM(last_pool)->total_prev_len + at;
  238. }
  239. qstr qstr_find_strn(const char *str, size_t str_len) {
  240. if (str_len == 0) {
  241. // strncmp behaviour is undefined for str==NULL.
  242. return MP_QSTR_;
  243. }
  244. #if MICROPY_QSTR_BYTES_IN_HASH
  245. // work out hash of str
  246. size_t str_hash = qstr_compute_hash((const byte *)str, str_len);
  247. #endif
  248. // search pools for the data
  249. for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
  250. size_t low = 0;
  251. size_t high = pool->len - 1;
  252. // binary search inside the pool
  253. if (pool->is_sorted) {
  254. while (high - low > 1) {
  255. size_t mid = (low + high) / 2;
  256. int cmp = strncmp(str, pool->qstrs[mid], str_len);
  257. if (cmp <= 0) {
  258. high = mid;
  259. } else {
  260. low = mid;
  261. }
  262. }
  263. }
  264. // sequential search for the remaining strings
  265. for (mp_uint_t at = low; at < high + 1; at++) {
  266. if (
  267. #if MICROPY_QSTR_BYTES_IN_HASH
  268. pool->hashes[at] == str_hash &&
  269. #endif
  270. pool->lengths[at] == str_len
  271. && memcmp(pool->qstrs[at], str, str_len) == 0) {
  272. return pool->total_prev_len + at;
  273. }
  274. }
  275. }
  276. // not found; return null qstr
  277. return MP_QSTRnull;
  278. }
  279. qstr qstr_from_str(const char *str) {
  280. return qstr_from_strn(str, strlen(str));
  281. }
  282. qstr qstr_from_strn(const char *str, size_t len) {
  283. QSTR_ENTER();
  284. qstr q = qstr_find_strn(str, len);
  285. if (q == 0) {
  286. // qstr does not exist in interned pool so need to add it
  287. // check that len is not too big
  288. if (len >= (1 << (8 * MICROPY_QSTR_BYTES_IN_LEN))) {
  289. QSTR_EXIT();
  290. mp_raise_msg(&mp_type_RuntimeError, MP_ERROR_TEXT("name too long"));
  291. }
  292. // compute number of bytes needed to intern this string
  293. size_t n_bytes = len + 1;
  294. if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) {
  295. // not enough room at end of previously interned string so try to grow
  296. char *new_p = m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false);
  297. if (new_p == NULL) {
  298. // could not grow existing memory; shrink it to fit previous
  299. (void)m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false);
  300. MP_STATE_VM(qstr_last_chunk) = NULL;
  301. } else {
  302. // could grow existing memory
  303. MP_STATE_VM(qstr_last_alloc) += n_bytes;
  304. }
  305. }
  306. if (MP_STATE_VM(qstr_last_chunk) == NULL) {
  307. // no existing memory for the interned string so allocate a new chunk
  308. size_t al = n_bytes;
  309. if (al < MICROPY_ALLOC_QSTR_CHUNK_INIT) {
  310. al = MICROPY_ALLOC_QSTR_CHUNK_INIT;
  311. }
  312. MP_STATE_VM(qstr_last_chunk) = m_new_maybe(char, al);
  313. if (MP_STATE_VM(qstr_last_chunk) == NULL) {
  314. // failed to allocate a large chunk so try with exact size
  315. MP_STATE_VM(qstr_last_chunk) = m_new_maybe(char, n_bytes);
  316. if (MP_STATE_VM(qstr_last_chunk) == NULL) {
  317. QSTR_EXIT();
  318. m_malloc_fail(n_bytes);
  319. }
  320. al = n_bytes;
  321. }
  322. MP_STATE_VM(qstr_last_alloc) = al;
  323. MP_STATE_VM(qstr_last_used) = 0;
  324. }
  325. // allocate memory from the chunk for this new interned string's data
  326. char *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used);
  327. MP_STATE_VM(qstr_last_used) += n_bytes;
  328. // store the interned strings' data
  329. memcpy(q_ptr, str, len);
  330. q_ptr[len] = '\0';
  331. q = qstr_add(len, q_ptr);
  332. }
  333. QSTR_EXIT();
  334. return q;
  335. }
  336. mp_uint_t qstr_hash(qstr q) {
  337. const qstr_pool_t *pool = find_qstr(&q);
  338. #if MICROPY_QSTR_BYTES_IN_HASH
  339. return pool->hashes[q];
  340. #else
  341. return qstr_compute_hash((byte *)pool->qstrs[q], pool->lengths[q]);
  342. #endif
  343. }
  344. size_t qstr_len(qstr q) {
  345. const qstr_pool_t *pool = find_qstr(&q);
  346. return pool->lengths[q];
  347. }
  348. const char *qstr_str(qstr q) {
  349. const qstr_pool_t *pool = find_qstr(&q);
  350. return pool->qstrs[q];
  351. }
  352. const byte *qstr_data(qstr q, size_t *len) {
  353. const qstr_pool_t *pool = find_qstr(&q);
  354. *len = pool->lengths[q];
  355. return (byte *)pool->qstrs[q];
  356. }
  357. void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, size_t *n_total_bytes) {
  358. QSTR_ENTER();
  359. *n_pool = 0;
  360. *n_qstr = 0;
  361. *n_str_data_bytes = 0;
  362. *n_total_bytes = 0;
  363. for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) {
  364. *n_pool += 1;
  365. *n_qstr += pool->len;
  366. for (qstr_len_t *l = pool->lengths, *l_top = pool->lengths + pool->len; l < l_top; l++) {
  367. *n_str_data_bytes += *l + 1;
  368. }
  369. #if MICROPY_ENABLE_GC
  370. *n_total_bytes += gc_nbytes(pool); // this counts actual bytes used in heap
  371. #else
  372. *n_total_bytes += sizeof(qstr_pool_t)
  373. + (sizeof(const char *)
  374. #if MICROPY_QSTR_BYTES_IN_HASH
  375. + sizeof(qstr_hash_t)
  376. #endif
  377. + sizeof(qstr_len_t)) * pool->alloc;
  378. #endif
  379. }
  380. *n_total_bytes += *n_str_data_bytes;
  381. QSTR_EXIT();
  382. }
  383. #if MICROPY_PY_MICROPYTHON_MEM_INFO
  384. void qstr_dump_data(void) {
  385. QSTR_ENTER();
  386. for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) {
  387. for (const char *const *q = pool->qstrs, *const *q_top = pool->qstrs + pool->len; q < q_top; q++) {
  388. mp_printf(&mp_plat_print, "Q(%s)\n", *q);
  389. }
  390. }
  391. QSTR_EXIT();
  392. }
  393. #endif
  394. #if MICROPY_ROM_TEXT_COMPRESSION
  395. #ifdef NO_QSTR
  396. // If NO_QSTR is set, it means we're doing QSTR extraction.
  397. // So we won't yet have "genhdr/compressed.data.h"
  398. #else
  399. // Emit the compressed_string_data string.
  400. #define MP_COMPRESSED_DATA(x) static const char *compressed_string_data = x;
  401. #define MP_MATCH_COMPRESSED(a, b)
  402. #include "genhdr/compressed.data.h"
  403. #undef MP_COMPRESSED_DATA
  404. #undef MP_MATCH_COMPRESSED
  405. #endif // NO_QSTR
  406. // This implements the "common word" compression scheme (see makecompresseddata.py) where the most
  407. // common 128 words in error messages are replaced by their index into the list of common words.
  408. // The compressed string data is delimited by setting high bit in the final char of each word.
  409. // e.g. aaaa<0x80|a>bbbbbb<0x80|b>....
  410. // This method finds the n'th string.
  411. static const byte *find_uncompressed_string(uint8_t n) {
  412. const byte *c = (byte *)compressed_string_data;
  413. while (n > 0) {
  414. while ((*c & 0x80) == 0) {
  415. ++c;
  416. }
  417. ++c;
  418. --n;
  419. }
  420. return c;
  421. }
  422. // Given a compressed string in src, decompresses it into dst.
  423. // dst must be large enough (use MP_MAX_UNCOMPRESSED_TEXT_LEN+1).
  424. void mp_decompress_rom_string(byte *dst, const mp_rom_error_text_t src_chr) {
  425. // Skip past the 0xff marker.
  426. const byte *src = (byte *)src_chr + 1;
  427. // Need to add spaces around compressed words, except for the first (i.e. transition from 1<->2).
  428. // 0 = start, 1 = compressed, 2 = regular.
  429. int state = 0;
  430. while (*src) {
  431. if ((byte) * src >= 128) {
  432. if (state != 0) {
  433. *dst++ = ' ';
  434. }
  435. state = 1;
  436. // High bit set, replace with common word.
  437. const byte *word = find_uncompressed_string(*src & 0x7f);
  438. // The word is terminated by the final char having its high bit set.
  439. while ((*word & 0x80) == 0) {
  440. *dst++ = *word++;
  441. }
  442. *dst++ = (*word & 0x7f);
  443. } else {
  444. // Otherwise just copy one char.
  445. if (state == 1) {
  446. *dst++ = ' ';
  447. }
  448. state = 2;
  449. *dst++ = *src;
  450. }
  451. ++src;
  452. }
  453. // Add null-terminator.
  454. *dst = 0;
  455. }
  456. #endif // MICROPY_ROM_TEXT_COMPRESSION