qstr.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. /*
  2. * This file is part of the MicroPython project, http://micropython.org/
  3. *
  4. * The MIT License (MIT)
  5. *
  6. * Copyright (c) 2013, 2014 Damien P. George
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #include <assert.h>
  27. #include <string.h>
  28. #include <stdio.h>
  29. #include "py/mpstate.h"
  30. #include "py/qstr.h"
  31. #include "py/gc.h"
  32. #include "py/runtime.h"
  33. #if MICROPY_DEBUG_VERBOSE // print debugging info
  34. #define DEBUG_printf DEBUG_printf
  35. #else // don't print debugging info
  36. #define DEBUG_printf(...) (void)0
  37. #endif
  38. // A qstr is an index into the qstr pool.
  39. // The data for a qstr is \0 terminated (so they can be printed using printf)
  40. #define Q_HASH_MASK ((1 << (8 * MICROPY_QSTR_BYTES_IN_HASH)) - 1)
  41. #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
  42. #define QSTR_ENTER() mp_thread_mutex_lock(&MP_STATE_VM(qstr_mutex), 1)
  43. #define QSTR_EXIT() mp_thread_mutex_unlock(&MP_STATE_VM(qstr_mutex))
  44. #else
  45. #define QSTR_ENTER()
  46. #define QSTR_EXIT()
  47. #endif
  48. // Initial number of entries for qstr pool, set so that the first dynamically
  49. // allocated pool is twice this size. The value here must be <= MP_QSTRnumber_of.
  50. #define MICROPY_ALLOC_QSTR_ENTRIES_INIT (10)
  51. // this must match the equivalent function in makeqstrdata.py
  52. size_t qstr_compute_hash(const byte *data, size_t len) {
  53. // djb2 algorithm; see http://www.cse.yorku.ca/~oz/hash.html
  54. size_t hash = 5381;
  55. for (const byte *top = data + len; data < top; data++) {
  56. hash = ((hash << 5) + hash) ^ (*data); // hash * 33 ^ data
  57. }
  58. hash &= Q_HASH_MASK;
  59. // Make sure that valid hash is never zero, zero means "hash not computed"
  60. if (hash == 0) {
  61. hash++;
  62. }
  63. return hash;
  64. }
  65. // The first pool is the static qstr table. The contents must remain stable as
  66. // it is part of the .mpy ABI. See the top of py/persistentcode.c and
  67. // static_qstr_list in makeqstrdata.py. This pool is unsorted (although in a
  68. // future .mpy version we could re-order them and make it sorted). It also
  69. // contains additional qstrs that must have IDs <256, see operator_qstr_list
  70. // in makeqstrdata.py.
  71. const qstr_hash_t mp_qstr_const_hashes_static[] = {
  72. #ifndef NO_QSTR
  73. #define QDEF0(id, hash, len, str) hash,
  74. #define QDEF1(id, hash, len, str)
  75. #include "genhdr/qstrdefs.generated.h"
  76. #undef QDEF0
  77. #undef QDEF1
  78. #endif
  79. };
  80. const qstr_len_t mp_qstr_const_lengths_static[] = {
  81. #ifndef NO_QSTR
  82. #define QDEF0(id, hash, len, str) len,
  83. #define QDEF1(id, hash, len, str)
  84. #include "genhdr/qstrdefs.generated.h"
  85. #undef QDEF0
  86. #undef QDEF1
  87. #endif
  88. };
  89. const qstr_pool_t mp_qstr_const_pool_static = {
  90. NULL, // no previous pool
  91. 0, // no previous pool
  92. false, // is_sorted
  93. MICROPY_ALLOC_QSTR_ENTRIES_INIT,
  94. MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
  95. (qstr_hash_t *)mp_qstr_const_hashes_static,
  96. (qstr_len_t *)mp_qstr_const_lengths_static,
  97. {
  98. #ifndef NO_QSTR
  99. #define QDEF0(id, hash, len, str) str,
  100. #define QDEF1(id, hash, len, str)
  101. #include "genhdr/qstrdefs.generated.h"
  102. #undef QDEF0
  103. #undef QDEF1
  104. #endif
  105. },
  106. };
  107. // The next pool is the remainder of the qstrs defined in the firmware. This
  108. // is sorted.
  109. const qstr_hash_t mp_qstr_const_hashes[] = {
  110. #ifndef NO_QSTR
  111. #define QDEF0(id, hash, len, str)
  112. #define QDEF1(id, hash, len, str) hash,
  113. #include "genhdr/qstrdefs.generated.h"
  114. #undef QDEF0
  115. #undef QDEF1
  116. #endif
  117. };
  118. const qstr_len_t mp_qstr_const_lengths[] = {
  119. #ifndef NO_QSTR
  120. #define QDEF0(id, hash, len, str)
  121. #define QDEF1(id, hash, len, str) len,
  122. #include "genhdr/qstrdefs.generated.h"
  123. #undef QDEF0
  124. #undef QDEF1
  125. #endif
  126. };
  127. const qstr_pool_t mp_qstr_const_pool = {
  128. &mp_qstr_const_pool_static,
  129. MP_QSTRnumber_of_static,
  130. true, // is_sorted
  131. MICROPY_ALLOC_QSTR_ENTRIES_INIT,
  132. MP_QSTRnumber_of - MP_QSTRnumber_of_static, // corresponds to number of strings in array just below
  133. (qstr_hash_t *)mp_qstr_const_hashes,
  134. (qstr_len_t *)mp_qstr_const_lengths,
  135. {
  136. #ifndef NO_QSTR
  137. #define QDEF0(id, hash, len, str)
  138. #define QDEF1(id, hash, len, str) str,
  139. #include "genhdr/qstrdefs.generated.h"
  140. #undef QDEF0
  141. #undef QDEF1
  142. #endif
  143. },
  144. };
  145. // If frozen code is enabled, then there is an additional, sorted, ROM pool
  146. // containing additional qstrs required by the frozen code.
  147. #ifdef MICROPY_QSTR_EXTRA_POOL
  148. extern const qstr_pool_t MICROPY_QSTR_EXTRA_POOL;
  149. #define CONST_POOL MICROPY_QSTR_EXTRA_POOL
  150. #else
  151. #define CONST_POOL mp_qstr_const_pool
  152. #endif
  153. void qstr_init(void) {
  154. MP_STATE_VM(last_pool) = (qstr_pool_t *)&CONST_POOL; // we won't modify the const_pool since it has no allocated room left
  155. MP_STATE_VM(qstr_last_chunk) = NULL;
  156. #if MICROPY_PY_THREAD && !MICROPY_PY_THREAD_GIL
  157. mp_thread_mutex_init(&MP_STATE_VM(qstr_mutex));
  158. #endif
  159. }
  160. STATIC const qstr_pool_t *find_qstr(qstr *q) {
  161. // search pool for this qstr
  162. // total_prev_len==0 in the final pool, so the loop will always terminate
  163. const qstr_pool_t *pool = MP_STATE_VM(last_pool);
  164. while (*q < pool->total_prev_len) {
  165. pool = pool->prev;
  166. }
  167. *q -= pool->total_prev_len;
  168. assert(*q < pool->len);
  169. return pool;
  170. }
  171. // qstr_mutex must be taken while in this function
  172. STATIC qstr qstr_add(mp_uint_t hash, mp_uint_t len, const char *q_ptr) {
  173. DEBUG_printf("QSTR: add hash=%d len=%d data=%.*s\n", hash, len, len, q_ptr);
  174. // make sure we have room in the pool for a new qstr
  175. if (MP_STATE_VM(last_pool)->len >= MP_STATE_VM(last_pool)->alloc) {
  176. size_t new_alloc = MP_STATE_VM(last_pool)->alloc * 2;
  177. #ifdef MICROPY_QSTR_EXTRA_POOL
  178. // Put a lower bound on the allocation size in case the extra qstr pool has few entries
  179. new_alloc = MAX(MICROPY_ALLOC_QSTR_ENTRIES_INIT, new_alloc);
  180. #endif
  181. mp_uint_t pool_size = sizeof(qstr_pool_t)
  182. + (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * new_alloc;
  183. qstr_pool_t *pool = (qstr_pool_t *)m_malloc_maybe(pool_size);
  184. if (pool == NULL) {
  185. // Keep qstr_last_chunk consistent with qstr_pool_t: qstr_last_chunk is not scanned
  186. // at garbage collection since it's reachable from a qstr_pool_t. And the caller of
  187. // this function expects q_ptr to be stored in a qstr_pool_t so it can be reached
  188. // by the collector. If qstr_pool_t allocation failed, qstr_last_chunk needs to be
  189. // NULL'd. Otherwise it may become a dangling pointer at the next garbage collection.
  190. MP_STATE_VM(qstr_last_chunk) = NULL;
  191. QSTR_EXIT();
  192. m_malloc_fail(new_alloc);
  193. }
  194. pool->hashes = (qstr_hash_t *)(pool->qstrs + new_alloc);
  195. pool->lengths = (qstr_len_t *)(pool->hashes + new_alloc);
  196. pool->prev = MP_STATE_VM(last_pool);
  197. pool->total_prev_len = MP_STATE_VM(last_pool)->total_prev_len + MP_STATE_VM(last_pool)->len;
  198. pool->alloc = new_alloc;
  199. pool->len = 0;
  200. MP_STATE_VM(last_pool) = pool;
  201. DEBUG_printf("QSTR: allocate new pool of size %d\n", MP_STATE_VM(last_pool)->alloc);
  202. }
  203. // add the new qstr
  204. mp_uint_t at = MP_STATE_VM(last_pool)->len;
  205. MP_STATE_VM(last_pool)->hashes[at] = hash;
  206. MP_STATE_VM(last_pool)->lengths[at] = len;
  207. MP_STATE_VM(last_pool)->qstrs[at] = q_ptr;
  208. MP_STATE_VM(last_pool)->len++;
  209. // return id for the newly-added qstr
  210. return MP_STATE_VM(last_pool)->total_prev_len + at;
  211. }
  212. qstr qstr_find_strn(const char *str, size_t str_len) {
  213. if (str_len == 0) {
  214. // strncmp behaviour is undefined for str==NULL.
  215. return MP_QSTR_;
  216. }
  217. // work out hash of str
  218. size_t str_hash = qstr_compute_hash((const byte *)str, str_len);
  219. // search pools for the data
  220. for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL; pool = pool->prev) {
  221. size_t low = 0;
  222. size_t high = pool->len - 1;
  223. // binary search inside the pool
  224. if (pool->is_sorted) {
  225. while (high - low > 1) {
  226. size_t mid = (low + high) / 2;
  227. int cmp = strncmp(str, pool->qstrs[mid], str_len);
  228. if (cmp <= 0) {
  229. high = mid;
  230. } else {
  231. low = mid;
  232. }
  233. }
  234. }
  235. // sequential search for the remaining strings
  236. for (mp_uint_t at = low; at < high + 1; at++) {
  237. if (pool->hashes[at] == str_hash && pool->lengths[at] == str_len
  238. && memcmp(pool->qstrs[at], str, str_len) == 0) {
  239. return pool->total_prev_len + at;
  240. }
  241. }
  242. }
  243. // not found; return null qstr
  244. return MP_QSTRnull;
  245. }
  246. qstr qstr_from_str(const char *str) {
  247. return qstr_from_strn(str, strlen(str));
  248. }
  249. qstr qstr_from_strn(const char *str, size_t len) {
  250. QSTR_ENTER();
  251. qstr q = qstr_find_strn(str, len);
  252. if (q == 0) {
  253. // qstr does not exist in interned pool so need to add it
  254. // check that len is not too big
  255. if (len >= (1 << (8 * MICROPY_QSTR_BYTES_IN_LEN))) {
  256. QSTR_EXIT();
  257. mp_raise_msg(&mp_type_RuntimeError, MP_ERROR_TEXT("name too long"));
  258. }
  259. // compute number of bytes needed to intern this string
  260. size_t n_bytes = len + 1;
  261. if (MP_STATE_VM(qstr_last_chunk) != NULL && MP_STATE_VM(qstr_last_used) + n_bytes > MP_STATE_VM(qstr_last_alloc)) {
  262. // not enough room at end of previously interned string so try to grow
  263. char *new_p = m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_alloc) + n_bytes, false);
  264. if (new_p == NULL) {
  265. // could not grow existing memory; shrink it to fit previous
  266. (void)m_renew_maybe(char, MP_STATE_VM(qstr_last_chunk), MP_STATE_VM(qstr_last_alloc), MP_STATE_VM(qstr_last_used), false);
  267. MP_STATE_VM(qstr_last_chunk) = NULL;
  268. } else {
  269. // could grow existing memory
  270. MP_STATE_VM(qstr_last_alloc) += n_bytes;
  271. }
  272. }
  273. if (MP_STATE_VM(qstr_last_chunk) == NULL) {
  274. // no existing memory for the interned string so allocate a new chunk
  275. size_t al = n_bytes;
  276. if (al < MICROPY_ALLOC_QSTR_CHUNK_INIT) {
  277. al = MICROPY_ALLOC_QSTR_CHUNK_INIT;
  278. }
  279. MP_STATE_VM(qstr_last_chunk) = m_new_maybe(char, al);
  280. if (MP_STATE_VM(qstr_last_chunk) == NULL) {
  281. // failed to allocate a large chunk so try with exact size
  282. MP_STATE_VM(qstr_last_chunk) = m_new_maybe(char, n_bytes);
  283. if (MP_STATE_VM(qstr_last_chunk) == NULL) {
  284. QSTR_EXIT();
  285. m_malloc_fail(n_bytes);
  286. }
  287. al = n_bytes;
  288. }
  289. MP_STATE_VM(qstr_last_alloc) = al;
  290. MP_STATE_VM(qstr_last_used) = 0;
  291. }
  292. // allocate memory from the chunk for this new interned string's data
  293. char *q_ptr = MP_STATE_VM(qstr_last_chunk) + MP_STATE_VM(qstr_last_used);
  294. MP_STATE_VM(qstr_last_used) += n_bytes;
  295. // store the interned strings' data
  296. size_t hash = qstr_compute_hash((const byte *)str, len);
  297. memcpy(q_ptr, str, len);
  298. q_ptr[len] = '\0';
  299. q = qstr_add(hash, len, q_ptr);
  300. }
  301. QSTR_EXIT();
  302. return q;
  303. }
  304. mp_uint_t qstr_hash(qstr q) {
  305. const qstr_pool_t *pool = find_qstr(&q);
  306. return pool->hashes[q];
  307. }
  308. size_t qstr_len(qstr q) {
  309. const qstr_pool_t *pool = find_qstr(&q);
  310. return pool->lengths[q];
  311. }
  312. const char *qstr_str(qstr q) {
  313. const qstr_pool_t *pool = find_qstr(&q);
  314. return pool->qstrs[q];
  315. }
  316. const byte *qstr_data(qstr q, size_t *len) {
  317. const qstr_pool_t *pool = find_qstr(&q);
  318. *len = pool->lengths[q];
  319. return (byte *)pool->qstrs[q];
  320. }
  321. void qstr_pool_info(size_t *n_pool, size_t *n_qstr, size_t *n_str_data_bytes, size_t *n_total_bytes) {
  322. QSTR_ENTER();
  323. *n_pool = 0;
  324. *n_qstr = 0;
  325. *n_str_data_bytes = 0;
  326. *n_total_bytes = 0;
  327. for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) {
  328. *n_pool += 1;
  329. *n_qstr += pool->len;
  330. for (qstr_len_t *l = pool->lengths, *l_top = pool->lengths + pool->len; l < l_top; l++) {
  331. *n_str_data_bytes += *l + 1;
  332. }
  333. #if MICROPY_ENABLE_GC
  334. *n_total_bytes += gc_nbytes(pool); // this counts actual bytes used in heap
  335. #else
  336. *n_total_bytes += sizeof(qstr_pool_t)
  337. + (sizeof(const char *) + sizeof(qstr_hash_t) + sizeof(qstr_len_t)) * pool->alloc;
  338. #endif
  339. }
  340. *n_total_bytes += *n_str_data_bytes;
  341. QSTR_EXIT();
  342. }
  343. #if MICROPY_PY_MICROPYTHON_MEM_INFO
  344. void qstr_dump_data(void) {
  345. QSTR_ENTER();
  346. for (const qstr_pool_t *pool = MP_STATE_VM(last_pool); pool != NULL && pool != &CONST_POOL; pool = pool->prev) {
  347. for (const char *const *q = pool->qstrs, *const *q_top = pool->qstrs + pool->len; q < q_top; q++) {
  348. mp_printf(&mp_plat_print, "Q(%s)\n", *q);
  349. }
  350. }
  351. QSTR_EXIT();
  352. }
  353. #endif
  354. #if MICROPY_ROM_TEXT_COMPRESSION
  355. #ifdef NO_QSTR
  356. // If NO_QSTR is set, it means we're doing QSTR extraction.
  357. // So we won't yet have "genhdr/compressed.data.h"
  358. #else
  359. // Emit the compressed_string_data string.
  360. #define MP_COMPRESSED_DATA(x) STATIC const char *compressed_string_data = x;
  361. #define MP_MATCH_COMPRESSED(a, b)
  362. #include "genhdr/compressed.data.h"
  363. #undef MP_COMPRESSED_DATA
  364. #undef MP_MATCH_COMPRESSED
  365. #endif // NO_QSTR
  366. // This implements the "common word" compression scheme (see makecompresseddata.py) where the most
  367. // common 128 words in error messages are replaced by their index into the list of common words.
  368. // The compressed string data is delimited by setting high bit in the final char of each word.
  369. // e.g. aaaa<0x80|a>bbbbbb<0x80|b>....
  370. // This method finds the n'th string.
  371. STATIC const byte *find_uncompressed_string(uint8_t n) {
  372. const byte *c = (byte *)compressed_string_data;
  373. while (n > 0) {
  374. while ((*c & 0x80) == 0) {
  375. ++c;
  376. }
  377. ++c;
  378. --n;
  379. }
  380. return c;
  381. }
  382. // Given a compressed string in src, decompresses it into dst.
  383. // dst must be large enough (use MP_MAX_UNCOMPRESSED_TEXT_LEN+1).
  384. void mp_decompress_rom_string(byte *dst, const mp_rom_error_text_t src_chr) {
  385. // Skip past the 0xff marker.
  386. const byte *src = (byte *)src_chr + 1;
  387. // Need to add spaces around compressed words, except for the first (i.e. transition from 1<->2).
  388. // 0 = start, 1 = compressed, 2 = regular.
  389. int state = 0;
  390. while (*src) {
  391. if ((byte) * src >= 128) {
  392. if (state != 0) {
  393. *dst++ = ' ';
  394. }
  395. state = 1;
  396. // High bit set, replace with common word.
  397. const byte *word = find_uncompressed_string(*src & 0x7f);
  398. // The word is terminated by the final char having its high bit set.
  399. while ((*word & 0x80) == 0) {
  400. *dst++ = *word++;
  401. }
  402. *dst++ = (*word & 0x7f);
  403. } else {
  404. // Otherwise just copy one char.
  405. if (state == 1) {
  406. *dst++ = ' ';
  407. }
  408. state = 2;
  409. *dst++ = *src;
  410. }
  411. ++src;
  412. }
  413. // Add null-terminator.
  414. *dst = 0;
  415. }
  416. #endif // MICROPY_ROM_TEXT_COMPRESSION