asmthumb.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. /*
  2. * This file is part of the MicroPython project, http://micropython.org/
  3. *
  4. * The MIT License (MIT)
  5. *
  6. * Copyright (c) 2013, 2014 Damien P. George
  7. *
  8. * Permission is hereby granted, free of charge, to any person obtaining a copy
  9. * of this software and associated documentation files (the "Software"), to deal
  10. * in the Software without restriction, including without limitation the rights
  11. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  12. * copies of the Software, and to permit persons to whom the Software is
  13. * furnished to do so, subject to the following conditions:
  14. *
  15. * The above copyright notice and this permission notice shall be included in
  16. * all copies or substantial portions of the Software.
  17. *
  18. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  21. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  23. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  24. * THE SOFTWARE.
  25. */
  26. #include <stdio.h>
  27. #include <assert.h>
  28. #include <string.h>
  29. #include "py/mpconfig.h"
  30. // wrapper around everything in this file
  31. #if MICROPY_EMIT_THUMB || MICROPY_EMIT_INLINE_THUMB
  32. #include "py/mpstate.h"
  33. #include "py/asmthumb.h"
  34. #ifdef _MSC_VER
  35. #include <intrin.h>
  36. static uint32_t mp_clz(uint32_t x) {
  37. unsigned long lz = 0;
  38. return _BitScanReverse(&lz, x) ? (sizeof(x) * 8 - 1) - lz : 0;
  39. }
  40. static uint32_t mp_ctz(uint32_t x) {
  41. unsigned long tz = 0;
  42. return _BitScanForward(&tz, x) ? tz : 0;
  43. }
  44. #else
  45. #define mp_clz(x) __builtin_clz(x)
  46. #define mp_ctz(x) __builtin_ctz(x)
  47. #endif
  48. #define UNSIGNED_FIT5(x) ((uint32_t)(x) < 32)
  49. #define UNSIGNED_FIT7(x) ((uint32_t)(x) < 128)
  50. #define UNSIGNED_FIT8(x) (((x) & 0xffffff00) == 0)
  51. #define UNSIGNED_FIT16(x) (((x) & 0xffff0000) == 0)
  52. #define SIGNED_FIT8(x) (((x) & 0xffffff80) == 0) || (((x) & 0xffffff80) == 0xffffff80)
  53. #define SIGNED_FIT9(x) (((x) & 0xffffff00) == 0) || (((x) & 0xffffff00) == 0xffffff00)
  54. #define SIGNED_FIT12(x) (((x) & 0xfffff800) == 0) || (((x) & 0xfffff800) == 0xfffff800)
  55. #define SIGNED_FIT23(x) (((x) & 0xffc00000) == 0) || (((x) & 0xffc00000) == 0xffc00000)
  56. // Note: these actually take an imm12 but the high-bit is not encoded here
  57. #define OP_ADD_W_RRI_HI(reg_src) (0xf200 | (reg_src))
  58. #define OP_ADD_W_RRI_LO(reg_dest, imm11) ((imm11 << 4 & 0x7000) | reg_dest << 8 | (imm11 & 0xff))
  59. #define OP_SUB_W_RRI_HI(reg_src) (0xf2a0 | (reg_src))
  60. #define OP_SUB_W_RRI_LO(reg_dest, imm11) ((imm11 << 4 & 0x7000) | reg_dest << 8 | (imm11 & 0xff))
  61. #define OP_LDR_W_HI(reg_base) (0xf8d0 | (reg_base))
  62. #define OP_LDR_W_LO(reg_dest, imm12) ((reg_dest) << 12 | (imm12))
  63. #define OP_LDRH_W_HI(reg_base) (0xf8b0 | (reg_base))
  64. #define OP_LDRH_W_LO(reg_dest, imm12) ((reg_dest) << 12 | (imm12))
  65. static inline byte *asm_thumb_get_cur_to_write_bytes(asm_thumb_t *as, int n) {
  66. return mp_asm_base_get_cur_to_write_bytes(&as->base, n);
  67. }
  68. /*
  69. static void asm_thumb_write_byte_1(asm_thumb_t *as, byte b1) {
  70. byte *c = asm_thumb_get_cur_to_write_bytes(as, 1);
  71. c[0] = b1;
  72. }
  73. */
  74. /*
  75. #define IMM32_L0(x) ((x) & 0xff)
  76. #define IMM32_L1(x) (((x) >> 8) & 0xff)
  77. #define IMM32_L2(x) (((x) >> 16) & 0xff)
  78. #define IMM32_L3(x) (((x) >> 24) & 0xff)
  79. static void asm_thumb_write_word32(asm_thumb_t *as, int w32) {
  80. byte *c = asm_thumb_get_cur_to_write_bytes(as, 4);
  81. c[0] = IMM32_L0(w32);
  82. c[1] = IMM32_L1(w32);
  83. c[2] = IMM32_L2(w32);
  84. c[3] = IMM32_L3(w32);
  85. }
  86. */
  87. // rlolist is a bit map indicating desired lo-registers
  88. #define OP_PUSH_RLIST(rlolist) (0xb400 | (rlolist))
  89. #define OP_PUSH_RLIST_LR(rlolist) (0xb400 | 0x0100 | (rlolist))
  90. #define OP_POP_RLIST(rlolist) (0xbc00 | (rlolist))
  91. #define OP_POP_RLIST_PC(rlolist) (0xbc00 | 0x0100 | (rlolist))
  92. // The number of words must fit in 7 unsigned bits
  93. #define OP_ADD_SP(num_words) (0xb000 | (num_words))
  94. #define OP_SUB_SP(num_words) (0xb080 | (num_words))
  95. // locals:
  96. // - stored on the stack in ascending order
  97. // - numbered 0 through num_locals-1
  98. // - SP points to first local
  99. //
  100. // | SP
  101. // v
  102. // l0 l1 l2 ... l(n-1)
  103. // ^ ^
  104. // | low address | high address in RAM
  105. void asm_thumb_entry(asm_thumb_t *as, int num_locals) {
  106. assert(num_locals >= 0);
  107. // If this Thumb machine code is run from ARM state then add a prelude
  108. // to switch to Thumb state for the duration of the function.
  109. #if MICROPY_DYNAMIC_COMPILER || MICROPY_EMIT_ARM || (defined(__arm__) && !defined(__thumb2__) && !defined(__thumb__))
  110. #if MICROPY_DYNAMIC_COMPILER
  111. if (mp_dynamic_compiler.native_arch == MP_NATIVE_ARCH_ARMV6)
  112. #endif
  113. {
  114. asm_thumb_op32(as, 0x4010, 0xe92d); // push {r4, lr}
  115. asm_thumb_op32(as, 0xe009, 0xe28f); // add lr, pc, 8 + 1
  116. asm_thumb_op32(as, 0xff3e, 0xe12f); // blx lr
  117. asm_thumb_op32(as, 0x4010, 0xe8bd); // pop {r4, lr}
  118. asm_thumb_op32(as, 0xff1e, 0xe12f); // bx lr
  119. }
  120. #endif
  121. // work out what to push and how many extra spaces to reserve on stack
  122. // so that we have enough for all locals and it's aligned an 8-byte boundary
  123. // we push extra regs (r1, r2, r3) to help do the stack adjustment
  124. // we probably should just always subtract from sp, since this would be more efficient
  125. // for push rlist, lowest numbered register at the lowest address
  126. uint reglist;
  127. uint stack_adjust;
  128. // don't pop r0 because it's used for return value
  129. switch (num_locals) {
  130. case 0:
  131. reglist = 0xf2;
  132. stack_adjust = 0;
  133. break;
  134. case 1:
  135. reglist = 0xf2;
  136. stack_adjust = 0;
  137. break;
  138. case 2:
  139. reglist = 0xfe;
  140. stack_adjust = 0;
  141. break;
  142. case 3:
  143. reglist = 0xfe;
  144. stack_adjust = 0;
  145. break;
  146. default:
  147. reglist = 0xfe;
  148. stack_adjust = ((num_locals - 3) + 1) & (~1);
  149. break;
  150. }
  151. asm_thumb_op16(as, OP_PUSH_RLIST_LR(reglist));
  152. if (stack_adjust > 0) {
  153. if (asm_thumb_allow_armv7m(as)) {
  154. if (UNSIGNED_FIT7(stack_adjust)) {
  155. asm_thumb_op16(as, OP_SUB_SP(stack_adjust));
  156. } else {
  157. asm_thumb_op32(as, OP_SUB_W_RRI_HI(ASM_THUMB_REG_SP), OP_SUB_W_RRI_LO(ASM_THUMB_REG_SP, stack_adjust * 4));
  158. }
  159. } else {
  160. int adj = stack_adjust;
  161. // we don't expect the stack_adjust to be massive
  162. while (!UNSIGNED_FIT7(adj)) {
  163. asm_thumb_op16(as, OP_SUB_SP(127));
  164. adj -= 127;
  165. }
  166. asm_thumb_op16(as, OP_SUB_SP(adj));
  167. }
  168. }
  169. as->push_reglist = reglist;
  170. as->stack_adjust = stack_adjust;
  171. }
  172. void asm_thumb_exit(asm_thumb_t *as) {
  173. if (as->stack_adjust > 0) {
  174. if (asm_thumb_allow_armv7m(as)) {
  175. if (UNSIGNED_FIT7(as->stack_adjust)) {
  176. asm_thumb_op16(as, OP_ADD_SP(as->stack_adjust));
  177. } else {
  178. asm_thumb_op32(as, OP_ADD_W_RRI_HI(ASM_THUMB_REG_SP), OP_ADD_W_RRI_LO(ASM_THUMB_REG_SP, as->stack_adjust * 4));
  179. }
  180. } else {
  181. int adj = as->stack_adjust;
  182. // we don't expect the stack_adjust to be massive
  183. while (!UNSIGNED_FIT7(adj)) {
  184. asm_thumb_op16(as, OP_ADD_SP(127));
  185. adj -= 127;
  186. }
  187. asm_thumb_op16(as, OP_ADD_SP(adj));
  188. }
  189. }
  190. asm_thumb_op16(as, OP_POP_RLIST_PC(as->push_reglist));
  191. }
  192. static mp_uint_t get_label_dest(asm_thumb_t *as, uint label) {
  193. assert(label < as->base.max_num_labels);
  194. return as->base.label_offsets[label];
  195. }
  196. void asm_thumb_op16(asm_thumb_t *as, uint op) {
  197. byte *c = asm_thumb_get_cur_to_write_bytes(as, 2);
  198. if (c != NULL) {
  199. // little endian
  200. c[0] = op;
  201. c[1] = op >> 8;
  202. }
  203. }
  204. void asm_thumb_op32(asm_thumb_t *as, uint op1, uint op2) {
  205. byte *c = asm_thumb_get_cur_to_write_bytes(as, 4);
  206. if (c != NULL) {
  207. // little endian, op1 then op2
  208. c[0] = op1;
  209. c[1] = op1 >> 8;
  210. c[2] = op2;
  211. c[3] = op2 >> 8;
  212. }
  213. }
  214. #define OP_FORMAT_4(op, rlo_dest, rlo_src) ((op) | ((rlo_src) << 3) | (rlo_dest))
  215. void asm_thumb_format_4(asm_thumb_t *as, uint op, uint rlo_dest, uint rlo_src) {
  216. assert(rlo_dest < ASM_THUMB_REG_R8);
  217. assert(rlo_src < ASM_THUMB_REG_R8);
  218. asm_thumb_op16(as, OP_FORMAT_4(op, rlo_dest, rlo_src));
  219. }
  220. void asm_thumb_mov_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_src) {
  221. uint op_lo;
  222. if (reg_src < 8) {
  223. op_lo = reg_src << 3;
  224. } else {
  225. op_lo = 0x40 | ((reg_src - 8) << 3);
  226. }
  227. if (reg_dest < 8) {
  228. op_lo |= reg_dest;
  229. } else {
  230. op_lo |= 0x80 | (reg_dest - 8);
  231. }
  232. // mov reg_dest, reg_src
  233. asm_thumb_op16(as, 0x4600 | op_lo);
  234. }
  235. // if loading lo half with movw, the i16 value will be zero extended into the r32 register!
  236. void asm_thumb_mov_reg_i16(asm_thumb_t *as, uint mov_op, uint reg_dest, int i16_src) {
  237. assert(reg_dest < ASM_THUMB_REG_R15);
  238. // mov[wt] reg_dest, #i16_src
  239. asm_thumb_op32(as, mov_op | ((i16_src >> 1) & 0x0400) | ((i16_src >> 12) & 0xf), ((i16_src << 4) & 0x7000) | (reg_dest << 8) | (i16_src & 0xff));
  240. }
  241. static void asm_thumb_mov_rlo_i16(asm_thumb_t *as, uint rlo_dest, int i16_src) {
  242. asm_thumb_mov_rlo_i8(as, rlo_dest, (i16_src >> 8) & 0xff);
  243. asm_thumb_lsl_rlo_rlo_i5(as, rlo_dest, rlo_dest, 8);
  244. asm_thumb_add_rlo_i8(as, rlo_dest, i16_src & 0xff);
  245. }
  246. #define OP_B_N(byte_offset) (0xe000 | (((byte_offset) >> 1) & 0x07ff))
  247. bool asm_thumb_b_n_label(asm_thumb_t *as, uint label) {
  248. mp_uint_t dest = get_label_dest(as, label);
  249. mp_int_t rel = dest - as->base.code_offset;
  250. rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
  251. asm_thumb_op16(as, OP_B_N(rel));
  252. return as->base.pass != MP_ASM_PASS_EMIT || SIGNED_FIT12(rel);
  253. }
  254. #define OP_BCC_N(cond, byte_offset) (0xd000 | ((cond) << 8) | (((byte_offset) >> 1) & 0x00ff))
  255. // all these bit-arithmetic operations need coverage testing!
  256. #define OP_BCC_W_HI(cond, byte_offset) (0xf000 | ((cond) << 6) | (((byte_offset) >> 10) & 0x0400) | (((byte_offset) >> 14) & 0x003f))
  257. #define OP_BCC_W_LO(byte_offset) (0x8000 | ((byte_offset) & 0x2000) | (((byte_offset) >> 1) & 0x0fff))
  258. bool asm_thumb_bcc_nw_label(asm_thumb_t *as, int cond, uint label, bool wide) {
  259. mp_uint_t dest = get_label_dest(as, label);
  260. mp_int_t rel = dest - as->base.code_offset;
  261. rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
  262. if (!wide) {
  263. asm_thumb_op16(as, OP_BCC_N(cond, rel));
  264. return as->base.pass != MP_ASM_PASS_EMIT || SIGNED_FIT9(rel);
  265. } else if (asm_thumb_allow_armv7m(as)) {
  266. asm_thumb_op32(as, OP_BCC_W_HI(cond, rel), OP_BCC_W_LO(rel));
  267. return true;
  268. } else {
  269. // this method should not be called for ARMV6M
  270. return false;
  271. }
  272. }
  273. #define OP_BL_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff))
  274. #define OP_BL_LO(byte_offset) (0xf800 | (((byte_offset) >> 1) & 0x07ff))
  275. bool asm_thumb_bl_label(asm_thumb_t *as, uint label) {
  276. mp_uint_t dest = get_label_dest(as, label);
  277. mp_int_t rel = dest - as->base.code_offset;
  278. rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
  279. asm_thumb_op32(as, OP_BL_HI(rel), OP_BL_LO(rel));
  280. return as->base.pass != MP_ASM_PASS_EMIT || SIGNED_FIT23(rel);
  281. }
  282. size_t asm_thumb_mov_reg_i32(asm_thumb_t *as, uint reg_dest, mp_uint_t i32) {
  283. // movw, movt does it in 8 bytes
  284. // ldr [pc, #], dw does it in 6 bytes, but we might not reach to end of code for dw
  285. size_t loc = mp_asm_base_get_code_pos(&as->base);
  286. if (asm_thumb_allow_armv7m(as)) {
  287. asm_thumb_mov_reg_i16(as, ASM_THUMB_OP_MOVW, reg_dest, i32);
  288. asm_thumb_mov_reg_i16(as, ASM_THUMB_OP_MOVT, reg_dest, i32 >> 16);
  289. } else {
  290. // should only be called with lo reg for ARMV6M
  291. assert(reg_dest < ASM_THUMB_REG_R8);
  292. // sanity check that generated code is aligned
  293. assert(!as->base.code_base || !(3u & (uintptr_t)as->base.code_base));
  294. // basically:
  295. // (nop)
  296. // ldr reg_dest, _data
  297. // b 1f
  298. // _data: .word i32
  299. // 1:
  300. if (as->base.code_offset & 2u) {
  301. asm_thumb_op16(as, ASM_THUMB_OP_NOP);
  302. }
  303. asm_thumb_ldr_rlo_pcrel_i8(as, reg_dest, 0);
  304. asm_thumb_op16(as, OP_B_N(2));
  305. asm_thumb_op16(as, i32 & 0xffff);
  306. asm_thumb_op16(as, i32 >> 16);
  307. }
  308. return loc;
  309. }
  310. void asm_thumb_mov_reg_i32_optimised(asm_thumb_t *as, uint reg_dest, int i32) {
  311. if (reg_dest < 8 && UNSIGNED_FIT8(i32)) {
  312. asm_thumb_mov_rlo_i8(as, reg_dest, i32);
  313. } else if (asm_thumb_allow_armv7m(as)) {
  314. if (UNSIGNED_FIT16(i32)) {
  315. asm_thumb_mov_reg_i16(as, ASM_THUMB_OP_MOVW, reg_dest, i32);
  316. } else {
  317. asm_thumb_mov_reg_i32(as, reg_dest, i32);
  318. }
  319. } else {
  320. uint rlo_dest = reg_dest;
  321. assert(rlo_dest < ASM_THUMB_REG_R8); // should never be called for ARMV6M
  322. bool negate = i32 < 0 && ((i32 + i32) & 0xffffffffu); // don't negate 0x80000000
  323. if (negate) {
  324. i32 = -i32;
  325. }
  326. uint clz = mp_clz(i32);
  327. uint ctz = i32 ? mp_ctz(i32) : 0;
  328. assert(clz + ctz <= 32);
  329. if (clz + ctz >= 24) {
  330. asm_thumb_mov_rlo_i8(as, rlo_dest, (i32 >> ctz) & 0xff);
  331. asm_thumb_lsl_rlo_rlo_i5(as, rlo_dest, rlo_dest, ctz);
  332. } else if (UNSIGNED_FIT16(i32)) {
  333. asm_thumb_mov_rlo_i16(as, rlo_dest, i32);
  334. } else {
  335. if (negate) {
  336. // no point in negating if we're storing in 32 bit anyway
  337. negate = false;
  338. i32 = -i32;
  339. }
  340. asm_thumb_mov_reg_i32(as, rlo_dest, i32);
  341. }
  342. if (negate) {
  343. asm_thumb_neg_rlo_rlo(as, rlo_dest, rlo_dest);
  344. }
  345. }
  346. }
  347. #define OP_STR_TO_SP_OFFSET(rlo_dest, word_offset) (0x9000 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
  348. #define OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset) (0x9800 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
  349. static void asm_thumb_mov_local_check(asm_thumb_t *as, int word_offset) {
  350. if (as->base.pass >= MP_ASM_PASS_EMIT) {
  351. assert(word_offset >= 0);
  352. if (!UNSIGNED_FIT8(word_offset)) {
  353. mp_raise_NotImplementedError(MP_ERROR_TEXT("too many locals for native method"));
  354. }
  355. }
  356. }
  357. void asm_thumb_mov_local_reg(asm_thumb_t *as, int local_num, uint rlo_src) {
  358. assert(rlo_src < ASM_THUMB_REG_R8);
  359. int word_offset = local_num;
  360. asm_thumb_mov_local_check(as, word_offset);
  361. asm_thumb_op16(as, OP_STR_TO_SP_OFFSET(rlo_src, word_offset));
  362. }
  363. void asm_thumb_mov_reg_local(asm_thumb_t *as, uint rlo_dest, int local_num) {
  364. assert(rlo_dest < ASM_THUMB_REG_R8);
  365. int word_offset = local_num;
  366. asm_thumb_mov_local_check(as, word_offset);
  367. asm_thumb_op16(as, OP_LDR_FROM_SP_OFFSET(rlo_dest, word_offset));
  368. }
  369. #define OP_ADD_REG_SP_OFFSET(rlo_dest, word_offset) (0xa800 | ((rlo_dest) << 8) | ((word_offset) & 0x00ff))
  370. void asm_thumb_mov_reg_local_addr(asm_thumb_t *as, uint rlo_dest, int local_num) {
  371. assert(rlo_dest < ASM_THUMB_REG_R8);
  372. int word_offset = local_num;
  373. assert(as->base.pass < MP_ASM_PASS_EMIT || word_offset >= 0);
  374. asm_thumb_op16(as, OP_ADD_REG_SP_OFFSET(rlo_dest, word_offset));
  375. }
  376. void asm_thumb_mov_reg_pcrel(asm_thumb_t *as, uint rlo_dest, uint label) {
  377. mp_uint_t dest = get_label_dest(as, label);
  378. mp_int_t rel = dest - as->base.code_offset;
  379. rel |= 1; // to stay in Thumb state when jumping to this address
  380. if (asm_thumb_allow_armv7m(as)) {
  381. rel -= 6 + 4; // adjust for mov_reg_i16, sxth_rlo_rlo and then PC+4 prefetch of add_reg_reg
  382. asm_thumb_mov_reg_i16(as, ASM_THUMB_OP_MOVW, rlo_dest, rel); // 4 bytes
  383. asm_thumb_sxth_rlo_rlo(as, rlo_dest, rlo_dest); // 2 bytes
  384. } else {
  385. rel -= 8 + 4; // adjust for four instructions and then PC+4 prefetch of add_reg_reg
  386. // 6 bytes
  387. asm_thumb_mov_rlo_i16(as, rlo_dest, rel);
  388. // 2 bytes - not always needed, but we want to keep the size the same
  389. asm_thumb_sxth_rlo_rlo(as, rlo_dest, rlo_dest);
  390. }
  391. asm_thumb_add_reg_reg(as, rlo_dest, ASM_THUMB_REG_R15); // 2 bytes
  392. }
  393. // ARMv7-M only
  394. static inline void asm_thumb_ldr_reg_reg_i12(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset) {
  395. asm_thumb_op32(as, OP_LDR_W_HI(reg_base), OP_LDR_W_LO(reg_dest, word_offset * 4));
  396. }
  397. // emits code for: reg_dest = reg_base + offset << offset_shift
  398. static void asm_thumb_add_reg_reg_offset(asm_thumb_t *as, uint reg_dest, uint reg_base, uint offset, uint offset_shift) {
  399. if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8) {
  400. if (offset << offset_shift < 256) {
  401. if (reg_dest != reg_base) {
  402. asm_thumb_mov_reg_reg(as, reg_dest, reg_base);
  403. }
  404. asm_thumb_add_rlo_i8(as, reg_dest, offset << offset_shift);
  405. } else if (UNSIGNED_FIT8(offset) && reg_dest != reg_base) {
  406. asm_thumb_mov_rlo_i8(as, reg_dest, offset);
  407. asm_thumb_lsl_rlo_rlo_i5(as, reg_dest, reg_dest, offset_shift);
  408. asm_thumb_add_rlo_rlo_rlo(as, reg_dest, reg_dest, reg_base);
  409. } else if (reg_dest != reg_base) {
  410. asm_thumb_mov_rlo_i16(as, reg_dest, offset << offset_shift);
  411. asm_thumb_add_rlo_rlo_rlo(as, reg_dest, reg_dest, reg_dest);
  412. } else {
  413. uint reg_other = reg_dest ^ 7;
  414. asm_thumb_op16(as, OP_PUSH_RLIST((1 << reg_other)));
  415. asm_thumb_mov_rlo_i16(as, reg_other, offset << offset_shift);
  416. asm_thumb_add_rlo_rlo_rlo(as, reg_dest, reg_dest, reg_other);
  417. asm_thumb_op16(as, OP_POP_RLIST((1 << reg_other)));
  418. }
  419. } else {
  420. assert(0); // should never be called for ARMV6M
  421. }
  422. }
  423. void asm_thumb_ldr_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset) {
  424. if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8 && UNSIGNED_FIT5(word_offset)) {
  425. asm_thumb_ldr_rlo_rlo_i5(as, reg_dest, reg_base, word_offset);
  426. } else if (asm_thumb_allow_armv7m(as)) {
  427. asm_thumb_ldr_reg_reg_i12(as, reg_dest, reg_base, word_offset);
  428. } else {
  429. asm_thumb_add_reg_reg_offset(as, reg_dest, reg_base, word_offset - 31, 2);
  430. asm_thumb_ldr_rlo_rlo_i5(as, reg_dest, reg_dest, 31);
  431. }
  432. }
  433. // ARMv7-M only
  434. static inline void asm_thumb_ldrh_reg_reg_i12(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset) {
  435. asm_thumb_op32(as, OP_LDRH_W_HI(reg_base), OP_LDRH_W_LO(reg_dest, uint16_offset * 2));
  436. }
  437. void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset) {
  438. if (reg_dest < ASM_THUMB_REG_R8 && reg_base < ASM_THUMB_REG_R8 && UNSIGNED_FIT5(uint16_offset)) {
  439. asm_thumb_ldrh_rlo_rlo_i5(as, reg_dest, reg_base, uint16_offset);
  440. } else if (asm_thumb_allow_armv7m(as)) {
  441. asm_thumb_ldrh_reg_reg_i12(as, reg_dest, reg_base, uint16_offset);
  442. } else {
  443. asm_thumb_add_reg_reg_offset(as, reg_dest, reg_base, uint16_offset - 31, 1);
  444. asm_thumb_ldrh_rlo_rlo_i5(as, reg_dest, reg_dest, 31);
  445. }
  446. }
  447. // this could be wrong, because it should have a range of +/- 16MiB...
  448. #define OP_BW_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff))
  449. #define OP_BW_LO(byte_offset) (0xb800 | (((byte_offset) >> 1) & 0x07ff))
  450. void asm_thumb_b_label(asm_thumb_t *as, uint label) {
  451. mp_uint_t dest = get_label_dest(as, label);
  452. mp_int_t rel = dest - as->base.code_offset;
  453. rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
  454. if (dest != (mp_uint_t)-1 && rel <= -4) {
  455. // is a backwards jump, so we know the size of the jump on the first pass
  456. // calculate rel assuming 12 bit relative jump
  457. if (SIGNED_FIT12(rel)) {
  458. asm_thumb_op16(as, OP_B_N(rel));
  459. return;
  460. }
  461. }
  462. // is a large backwards jump, or a forwards jump (that must be assumed large)
  463. if (asm_thumb_allow_armv7m(as)) {
  464. asm_thumb_op32(as, OP_BW_HI(rel), OP_BW_LO(rel));
  465. } else {
  466. if (SIGNED_FIT12(rel)) {
  467. // this code path has to be the same number of instructions irrespective of rel
  468. asm_thumb_op16(as, OP_B_N(rel));
  469. } else {
  470. asm_thumb_op16(as, ASM_THUMB_OP_NOP);
  471. if (dest != (mp_uint_t)-1) {
  472. // we have an actual branch > 12 bits; this is not handled yet
  473. mp_raise_NotImplementedError(MP_ERROR_TEXT("native method too big"));
  474. }
  475. }
  476. }
  477. }
  478. void asm_thumb_bcc_label(asm_thumb_t *as, int cond, uint label) {
  479. mp_uint_t dest = get_label_dest(as, label);
  480. mp_int_t rel = dest - as->base.code_offset;
  481. rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
  482. if (dest != (mp_uint_t)-1 && rel <= -4) {
  483. // is a backwards jump, so we know the size of the jump on the first pass
  484. // calculate rel assuming 9 bit relative jump
  485. if (SIGNED_FIT9(rel)) {
  486. asm_thumb_op16(as, OP_BCC_N(cond, rel));
  487. return;
  488. }
  489. }
  490. // is a large backwards jump, or a forwards jump (that must be assumed large)
  491. if (asm_thumb_allow_armv7m(as)) {
  492. asm_thumb_op32(as, OP_BCC_W_HI(cond, rel), OP_BCC_W_LO(rel));
  493. } else {
  494. // reverse the sense of the branch to jump over a longer branch
  495. asm_thumb_op16(as, OP_BCC_N(cond ^ 1, 0));
  496. asm_thumb_b_label(as, label);
  497. }
  498. }
  499. void asm_thumb_bcc_rel9(asm_thumb_t *as, int cond, int rel) {
  500. rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
  501. assert(SIGNED_FIT9(rel));
  502. asm_thumb_op16(as, OP_BCC_N(cond, rel));
  503. }
  504. void asm_thumb_b_rel12(asm_thumb_t *as, int rel) {
  505. rel -= 4; // account for instruction prefetch, PC is 4 bytes ahead of this instruction
  506. assert(SIGNED_FIT12(rel));
  507. asm_thumb_op16(as, OP_B_N(rel));
  508. }
  509. #define OP_BLX(reg) (0x4780 | ((reg) << 3))
  510. #define OP_SVC(arg) (0xdf00 | (arg))
  511. void asm_thumb_bl_ind(asm_thumb_t *as, uint fun_id, uint reg_temp) {
  512. // Load ptr to function from table, indexed by fun_id, then call it
  513. asm_thumb_ldr_reg_reg_i12_optimised(as, reg_temp, ASM_THUMB_REG_FUN_TABLE, fun_id);
  514. asm_thumb_op16(as, OP_BLX(reg_temp));
  515. }
  516. #endif // MICROPY_EMIT_THUMB || MICROPY_EMIT_INLINE_THUMB