asm_arm_mult_square.inc 97 KB


  1. /* Copyright 2015, Kenneth MacKay. Licensed under the BSD 2-clause license. */
  2. #ifndef _UECC_ASM_ARM_MULT_SQUARE_H_
  3. #define _UECC_ASM_ARM_MULT_SQUARE_H_
  4. #define FAST_MULT_ASM_5 \
  5. "push {r3} \n\t" \
  6. "add r0, 12 \n\t" \
  7. "add r2, 12 \n\t" \
  8. "ldmia r1!, {r3,r4} \n\t" \
  9. "ldmia r2!, {r6,r7} \n\t" \
  10. \
  11. "umull r11, r12, r3, r6 \n\t" \
  12. "stmia r0!, {r11} \n\t" \
  13. \
  14. "mov r10, #0 \n\t" \
  15. "umull r11, r9, r3, r7 \n\t" \
  16. "adds r12, r12, r11 \n\t" \
  17. "adc r9, r9, #0 \n\t" \
  18. "umull r11, r14, r4, r6 \n\t" \
  19. "adds r12, r12, r11 \n\t" \
  20. "adcs r9, r9, r14 \n\t" \
  21. "adc r10, r10, #0 \n\t" \
  22. "stmia r0!, {r12} \n\t" \
  23. \
  24. "umull r12, r14, r4, r7 \n\t" \
  25. "adds r9, r9, r12 \n\t" \
  26. "adc r10, r10, r14 \n\t" \
  27. "stmia r0!, {r9, r10} \n\t" \
  28. \
  29. "sub r0, 28 \n\t" \
  30. "sub r2, 20 \n\t" \
  31. "ldmia r2!, {r6,r7,r8} \n\t" \
  32. "ldmia r1!, {r5} \n\t" \
  33. \
  34. "umull r11, r12, r3, r6 \n\t" \
  35. "stmia r0!, {r11} \n\t" \
  36. \
  37. "mov r10, #0 \n\t" \
  38. "umull r11, r9, r3, r7 \n\t" \
  39. "adds r12, r12, r11 \n\t" \
  40. "adc r9, r9, #0 \n\t" \
  41. "umull r11, r14, r4, r6 \n\t" \
  42. "adds r12, r12, r11 \n\t" \
  43. "adcs r9, r9, r14 \n\t" \
  44. "adc r10, r10, #0 \n\t" \
  45. "stmia r0!, {r12} \n\t" \
  46. \
  47. "mov r11, #0 \n\t" \
  48. "umull r12, r14, r3, r8 \n\t" \
  49. "adds r9, r9, r12 \n\t" \
  50. "adcs r10, r10, r14 \n\t" \
  51. "adc r11, r11, #0 \n\t" \
  52. "umull r12, r14, r4, r7 \n\t" \
  53. "adds r9, r9, r12 \n\t" \
  54. "adcs r10, r10, r14 \n\t" \
  55. "adc r11, r11, #0 \n\t" \
  56. "umull r12, r14, r5, r6 \n\t" \
  57. "adds r9, r9, r12 \n\t" \
  58. "adcs r10, r10, r14 \n\t" \
  59. "adc r11, r11, #0 \n\t" \
  60. "stmia r0!, {r9} \n\t" \
  61. \
  62. "ldmia r1!, {r3} \n\t" \
  63. "mov r12, #0 \n\t" \
  64. "umull r14, r9, r4, r8 \n\t" \
  65. "adds r10, r10, r14 \n\t" \
  66. "adcs r11, r11, r9 \n\t" \
  67. "adc r12, r12, #0 \n\t" \
  68. "umull r14, r9, r5, r7 \n\t" \
  69. "adds r10, r10, r14 \n\t" \
  70. "adcs r11, r11, r9 \n\t" \
  71. "adc r12, r12, #0 \n\t" \
  72. "umull r14, r9, r3, r6 \n\t" \
  73. "adds r10, r10, r14 \n\t" \
  74. "adcs r11, r11, r9 \n\t" \
  75. "adc r12, r12, #0 \n\t" \
  76. "ldr r14, [r0] \n\t" \
  77. "adds r10, r10, r14 \n\t" \
  78. "adcs r11, r11, #0 \n\t" \
  79. "adc r12, r12, #0 \n\t" \
  80. "stmia r0!, {r10} \n\t" \
  81. \
  82. "ldmia r1!, {r4} \n\t" \
  83. "mov r14, #0 \n\t" \
  84. "umull r9, r10, r5, r8 \n\t" \
  85. "adds r11, r11, r9 \n\t" \
  86. "adcs r12, r12, r10 \n\t" \
  87. "adc r14, r14, #0 \n\t" \
  88. "umull r9, r10, r3, r7 \n\t" \
  89. "adds r11, r11, r9 \n\t" \
  90. "adcs r12, r12, r10 \n\t" \
  91. "adc r14, r14, #0 \n\t" \
  92. "umull r9, r10, r4, r6 \n\t" \
  93. "adds r11, r11, r9 \n\t" \
  94. "adcs r12, r12, r10 \n\t" \
  95. "adc r14, r14, #0 \n\t" \
  96. "ldr r9, [r0] \n\t" \
  97. "adds r11, r11, r9 \n\t" \
  98. "adcs r12, r12, #0 \n\t" \
  99. "adc r14, r14, #0 \n\t" \
  100. "stmia r0!, {r11} \n\t" \
  101. \
  102. "ldmia r2!, {r6} \n\t" \
  103. "mov r9, #0 \n\t" \
  104. "umull r10, r11, r5, r6 \n\t" \
  105. "adds r12, r12, r10 \n\t" \
  106. "adcs r14, r14, r11 \n\t" \
  107. "adc r9, r9, #0 \n\t" \
  108. "umull r10, r11, r3, r8 \n\t" \
  109. "adds r12, r12, r10 \n\t" \
  110. "adcs r14, r14, r11 \n\t" \
  111. "adc r9, r9, #0 \n\t" \
  112. "umull r10, r11, r4, r7 \n\t" \
  113. "adds r12, r12, r10 \n\t" \
  114. "adcs r14, r14, r11 \n\t" \
  115. "adc r9, r9, #0 \n\t" \
  116. "ldr r10, [r0] \n\t" \
  117. "adds r12, r12, r10 \n\t" \
  118. "adcs r14, r14, #0 \n\t" \
  119. "adc r9, r9, #0 \n\t" \
  120. "stmia r0!, {r12} \n\t" \
  121. \
  122. "ldmia r2!, {r7} \n\t" \
  123. "mov r10, #0 \n\t" \
  124. "umull r11, r12, r5, r7 \n\t" \
  125. "adds r14, r14, r11 \n\t" \
  126. "adcs r9, r9, r12 \n\t" \
  127. "adc r10, r10, #0 \n\t" \
  128. "umull r11, r12, r3, r6 \n\t" \
  129. "adds r14, r14, r11 \n\t" \
  130. "adcs r9, r9, r12 \n\t" \
  131. "adc r10, r10, #0 \n\t" \
  132. "umull r11, r12, r4, r8 \n\t" \
  133. "adds r14, r14, r11 \n\t" \
  134. "adcs r9, r9, r12 \n\t" \
  135. "adc r10, r10, #0 \n\t" \
  136. "ldr r11, [r0] \n\t" \
  137. "adds r14, r14, r11 \n\t" \
  138. "adcs r9, r9, #0 \n\t" \
  139. "adc r10, r10, #0 \n\t" \
  140. "stmia r0!, {r14} \n\t" \
  141. \
  142. "mov r11, #0 \n\t" \
  143. "umull r12, r14, r3, r7 \n\t" \
  144. "adds r9, r9, r12 \n\t" \
  145. "adcs r10, r10, r14 \n\t" \
  146. "adc r11, r11, #0 \n\t" \
  147. "umull r12, r14, r4, r6 \n\t" \
  148. "adds r9, r9, r12 \n\t" \
  149. "adcs r10, r10, r14 \n\t" \
  150. "adc r11, r11, #0 \n\t" \
  151. "stmia r0!, {r9} \n\t" \
  152. \
  153. "umull r14, r9, r4, r7 \n\t" \
  154. "adds r10, r10, r14 \n\t" \
  155. "adc r11, r11, r9 \n\t" \
  156. "stmia r0!, {r10, r11} \n\t" \
  157. "pop {r3} \n\t"
  158. #define FAST_MULT_ASM_5_TO_6 \
  159. "cmp r3, #5 \n\t" \
  160. "beq 1f \n\t" \
  161. \
  162. /* r4 = left high, r5 = right high */ \
  163. "ldr r4, [r1] \n\t" \
  164. "ldr r5, [r2] \n\t" \
  165. \
  166. "sub r0, #20 \n\t" \
  167. "sub r1, #20 \n\t" \
  168. "sub r2, #20 \n\t" \
  169. \
  170. "ldr r6, [r0] \n\t" \
  171. "ldr r7, [r1], #4 \n\t" \
  172. "ldr r8, [r2], #4 \n\t" \
  173. "mov r14, #0 \n\t" \
  174. "umull r9, r10, r4, r8 \n\t" \
  175. "umull r11, r12, r5, r7 \n\t" \
  176. "adds r9, r9, r6 \n\t" \
  177. "adc r10, r10, #0 \n\t" \
  178. "adds r9, r9, r11 \n\t" \
  179. "adcs r10, r10, r12 \n\t" \
  180. "adc r14, r14, #0 \n\t" \
  181. "str r9, [r0], #4 \n\t" \
  182. \
  183. "ldr r6, [r0] \n\t" \
  184. "adds r10, r10, r6 \n\t" \
  185. "adcs r14, r14, #0 \n\t" \
  186. "ldr r7, [r1], #4 \n\t" \
  187. "ldr r8, [r2], #4 \n\t" \
  188. "mov r9, #0 \n\t" \
  189. "umull r11, r12, r4, r8 \n\t" \
  190. "adds r10, r10, r11 \n\t" \
  191. "adcs r14, r14, r12 \n\t" \
  192. "adc r9, r9, #0 \n\t" \
  193. "umull r11, r12, r5, r7 \n\t" \
  194. "adds r10, r10, r11 \n\t" \
  195. "adcs r14, r14, r12 \n\t" \
  196. "adc r9, r9, #0 \n\t" \
  197. "str r10, [r0], #4 \n\t" \
  198. \
  199. "ldr r6, [r0] \n\t" \
  200. "adds r14, r14, r6 \n\t" \
  201. "adcs r9, r9, #0 \n\t" \
  202. "ldr r7, [r1], #4 \n\t" \
  203. "ldr r8, [r2], #4 \n\t" \
  204. "mov r10, #0 \n\t" \
  205. "umull r11, r12, r4, r8 \n\t" \
  206. "adds r14, r14, r11 \n\t" \
  207. "adcs r9, r9, r12 \n\t" \
  208. "adc r10, r10, #0 \n\t" \
  209. "umull r11, r12, r5, r7 \n\t" \
  210. "adds r14, r14, r11 \n\t" \
  211. "adcs r9, r9, r12 \n\t" \
  212. "adc r10, r10, #0 \n\t" \
  213. "str r14, [r0], #4 \n\t" \
  214. \
  215. "ldr r6, [r0] \n\t" \
  216. "adds r9, r9, r6 \n\t" \
  217. "adcs r10, r10, #0 \n\t" \
  218. "ldr r7, [r1], #4 \n\t" \
  219. "ldr r8, [r2], #4 \n\t" \
  220. "mov r14, #0 \n\t" \
  221. "umull r11, r12, r4, r8 \n\t" \
  222. "adds r9, r9, r11 \n\t" \
  223. "adcs r10, r10, r12 \n\t" \
  224. "adc r14, r14, #0 \n\t" \
  225. "umull r11, r12, r5, r7 \n\t" \
  226. "adds r9, r9, r11 \n\t" \
  227. "adcs r10, r10, r12 \n\t" \
  228. "adc r14, r14, #0 \n\t" \
  229. "str r9, [r0], #4 \n\t" \
  230. \
  231. "ldr r6, [r0] \n\t" \
  232. "adds r10, r10, r6 \n\t" \
  233. "adcs r14, r14, #0 \n\t" \
  234. /* skip past already-loaded (r4, r5) */ \
  235. "ldr r7, [r1], #8 \n\t" \
  236. "ldr r8, [r2], #8 \n\t" \
  237. "mov r9, #0 \n\t" \
  238. "umull r11, r12, r4, r8 \n\t" \
  239. "adds r10, r10, r11 \n\t" \
  240. "adcs r14, r14, r12 \n\t" \
  241. "adc r9, r9, #0 \n\t" \
  242. "umull r11, r12, r5, r7 \n\t" \
  243. "adds r10, r10, r11 \n\t" \
  244. "adcs r14, r14, r12 \n\t" \
  245. "adc r9, r9, #0 \n\t" \
  246. "str r10, [r0], #4 \n\t" \
  247. \
  248. "umull r11, r12, r4, r5 \n\t" \
  249. "adds r11, r11, r14 \n\t" \
  250. "adc r12, r12, r9 \n\t" \
  251. "stmia r0!, {r11, r12} \n\t"
  252. #define FAST_MULT_ASM_6 \
  253. "push {r3} \n\t" \
  254. "add r0, 12 \n\t" \
  255. "add r2, 12 \n\t" \
  256. "ldmia r1!, {r3,r4,r5} \n\t" \
  257. "ldmia r2!, {r6,r7,r8} \n\t" \
  258. \
  259. "umull r11, r12, r3, r6 \n\t" \
  260. "stmia r0!, {r11} \n\t" \
  261. \
  262. "mov r10, #0 \n\t" \
  263. "umull r11, r9, r3, r7 \n\t" \
  264. "adds r12, r12, r11 \n\t" \
  265. "adc r9, r9, #0 \n\t" \
  266. "umull r11, r14, r4, r6 \n\t" \
  267. "adds r12, r12, r11 \n\t" \
  268. "adcs r9, r9, r14 \n\t" \
  269. "adc r10, r10, #0 \n\t" \
  270. "stmia r0!, {r12} \n\t" \
  271. \
  272. "mov r11, #0 \n\t" \
  273. "umull r12, r14, r3, r8 \n\t" \
  274. "adds r9, r9, r12 \n\t" \
  275. "adcs r10, r10, r14 \n\t" \
  276. "adc r11, r11, #0 \n\t" \
  277. "umull r12, r14, r4, r7 \n\t" \
  278. "adds r9, r9, r12 \n\t" \
  279. "adcs r10, r10, r14 \n\t" \
  280. "adc r11, r11, #0 \n\t" \
  281. "umull r12, r14, r5, r6 \n\t" \
  282. "adds r9, r9, r12 \n\t" \
  283. "adcs r10, r10, r14 \n\t" \
  284. "adc r11, r11, #0 \n\t" \
  285. "stmia r0!, {r9} \n\t" \
  286. \
  287. "mov r12, #0 \n\t" \
  288. "umull r14, r9, r4, r8 \n\t" \
  289. "adds r10, r10, r14 \n\t" \
  290. "adcs r11, r11, r9 \n\t" \
  291. "adc r12, r12, #0 \n\t" \
  292. "umull r14, r9, r5, r7 \n\t" \
  293. "adds r10, r10, r14 \n\t" \
  294. "adcs r11, r11, r9 \n\t" \
  295. "adc r12, r12, #0 \n\t" \
  296. "stmia r0!, {r10} \n\t" \
  297. \
  298. "umull r9, r10, r5, r8 \n\t" \
  299. "adds r11, r11, r9 \n\t" \
  300. "adc r12, r12, r10 \n\t" \
  301. "stmia r0!, {r11, r12} \n\t" \
  302. \
  303. "sub r0, 36 \n\t" \
  304. "sub r2, 24 \n\t" \
  305. "ldmia r2!, {r6,r7,r8} \n\t" \
  306. \
  307. "umull r11, r12, r3, r6 \n\t" \
  308. "stmia r0!, {r11} \n\t" \
  309. \
  310. "mov r10, #0 \n\t" \
  311. "umull r11, r9, r3, r7 \n\t" \
  312. "adds r12, r12, r11 \n\t" \
  313. "adc r9, r9, #0 \n\t" \
  314. "umull r11, r14, r4, r6 \n\t" \
  315. "adds r12, r12, r11 \n\t" \
  316. "adcs r9, r9, r14 \n\t" \
  317. "adc r10, r10, #0 \n\t" \
  318. "stmia r0!, {r12} \n\t" \
  319. \
  320. "mov r11, #0 \n\t" \
  321. "umull r12, r14, r3, r8 \n\t" \
  322. "adds r9, r9, r12 \n\t" \
  323. "adcs r10, r10, r14 \n\t" \
  324. "adc r11, r11, #0 \n\t" \
  325. "umull r12, r14, r4, r7 \n\t" \
  326. "adds r9, r9, r12 \n\t" \
  327. "adcs r10, r10, r14 \n\t" \
  328. "adc r11, r11, #0 \n\t" \
  329. "umull r12, r14, r5, r6 \n\t" \
  330. "adds r9, r9, r12 \n\t" \
  331. "adcs r10, r10, r14 \n\t" \
  332. "adc r11, r11, #0 \n\t" \
  333. "stmia r0!, {r9} \n\t" \
  334. \
  335. "ldmia r1!, {r3} \n\t" \
  336. "mov r12, #0 \n\t" \
  337. "umull r14, r9, r4, r8 \n\t" \
  338. "adds r10, r10, r14 \n\t" \
  339. "adcs r11, r11, r9 \n\t" \
  340. "adc r12, r12, #0 \n\t" \
  341. "umull r14, r9, r5, r7 \n\t" \
  342. "adds r10, r10, r14 \n\t" \
  343. "adcs r11, r11, r9 \n\t" \
  344. "adc r12, r12, #0 \n\t" \
  345. "umull r14, r9, r3, r6 \n\t" \
  346. "adds r10, r10, r14 \n\t" \
  347. "adcs r11, r11, r9 \n\t" \
  348. "adc r12, r12, #0 \n\t" \
  349. "ldr r14, [r0] \n\t" \
  350. "adds r10, r10, r14 \n\t" \
  351. "adcs r11, r11, #0 \n\t" \
  352. "adc r12, r12, #0 \n\t" \
  353. "stmia r0!, {r10} \n\t" \
  354. \
  355. "ldmia r1!, {r4} \n\t" \
  356. "mov r14, #0 \n\t" \
  357. "umull r9, r10, r5, r8 \n\t" \
  358. "adds r11, r11, r9 \n\t" \
  359. "adcs r12, r12, r10 \n\t" \
  360. "adc r14, r14, #0 \n\t" \
  361. "umull r9, r10, r3, r7 \n\t" \
  362. "adds r11, r11, r9 \n\t" \
  363. "adcs r12, r12, r10 \n\t" \
  364. "adc r14, r14, #0 \n\t" \
  365. "umull r9, r10, r4, r6 \n\t" \
  366. "adds r11, r11, r9 \n\t" \
  367. "adcs r12, r12, r10 \n\t" \
  368. "adc r14, r14, #0 \n\t" \
  369. "ldr r9, [r0] \n\t" \
  370. "adds r11, r11, r9 \n\t" \
  371. "adcs r12, r12, #0 \n\t" \
  372. "adc r14, r14, #0 \n\t" \
  373. "stmia r0!, {r11} \n\t" \
  374. \
  375. "ldmia r1!, {r5} \n\t" \
  376. "mov r9, #0 \n\t" \
  377. "umull r10, r11, r3, r8 \n\t" \
  378. "adds r12, r12, r10 \n\t" \
  379. "adcs r14, r14, r11 \n\t" \
  380. "adc r9, r9, #0 \n\t" \
  381. "umull r10, r11, r4, r7 \n\t" \
  382. "adds r12, r12, r10 \n\t" \
  383. "adcs r14, r14, r11 \n\t" \
  384. "adc r9, r9, #0 \n\t" \
  385. "umull r10, r11, r5, r6 \n\t" \
  386. "adds r12, r12, r10 \n\t" \
  387. "adcs r14, r14, r11 \n\t" \
  388. "adc r9, r9, #0 \n\t" \
  389. "ldr r10, [r0] \n\t" \
  390. "adds r12, r12, r10 \n\t" \
  391. "adcs r14, r14, #0 \n\t" \
  392. "adc r9, r9, #0 \n\t" \
  393. "stmia r0!, {r12} \n\t" \
  394. \
  395. "ldmia r2!, {r6} \n\t" \
  396. "mov r10, #0 \n\t" \
  397. "umull r11, r12, r3, r6 \n\t" \
  398. "adds r14, r14, r11 \n\t" \
  399. "adcs r9, r9, r12 \n\t" \
  400. "adc r10, r10, #0 \n\t" \
  401. "umull r11, r12, r4, r8 \n\t" \
  402. "adds r14, r14, r11 \n\t" \
  403. "adcs r9, r9, r12 \n\t" \
  404. "adc r10, r10, #0 \n\t" \
  405. "umull r11, r12, r5, r7 \n\t" \
  406. "adds r14, r14, r11 \n\t" \
  407. "adcs r9, r9, r12 \n\t" \
  408. "adc r10, r10, #0 \n\t" \
  409. "ldr r11, [r0] \n\t" \
  410. "adds r14, r14, r11 \n\t" \
  411. "adcs r9, r9, #0 \n\t" \
  412. "adc r10, r10, #0 \n\t" \
  413. "stmia r0!, {r14} \n\t" \
  414. \
  415. "ldmia r2!, {r7} \n\t" \
  416. "mov r11, #0 \n\t" \
  417. "umull r12, r14, r3, r7 \n\t" \
  418. "adds r9, r9, r12 \n\t" \
  419. "adcs r10, r10, r14 \n\t" \
  420. "adc r11, r11, #0 \n\t" \
  421. "umull r12, r14, r4, r6 \n\t" \
  422. "adds r9, r9, r12 \n\t" \
  423. "adcs r10, r10, r14 \n\t" \
  424. "adc r11, r11, #0 \n\t" \
  425. "umull r12, r14, r5, r8 \n\t" \
  426. "adds r9, r9, r12 \n\t" \
  427. "adcs r10, r10, r14 \n\t" \
  428. "adc r11, r11, #0 \n\t" \
  429. "ldr r12, [r0] \n\t" \
  430. "adds r9, r9, r12 \n\t" \
  431. "adcs r10, r10, #0 \n\t" \
  432. "adc r11, r11, #0 \n\t" \
  433. "stmia r0!, {r9} \n\t" \
  434. \
  435. "ldmia r2!, {r8} \n\t" \
  436. "mov r12, #0 \n\t" \
  437. "umull r14, r9, r3, r8 \n\t" \
  438. "adds r10, r10, r14 \n\t" \
  439. "adcs r11, r11, r9 \n\t" \
  440. "adc r12, r12, #0 \n\t" \
  441. "umull r14, r9, r4, r7 \n\t" \
  442. "adds r10, r10, r14 \n\t" \
  443. "adcs r11, r11, r9 \n\t" \
  444. "adc r12, r12, #0 \n\t" \
  445. "umull r14, r9, r5, r6 \n\t" \
  446. "adds r10, r10, r14 \n\t" \
  447. "adcs r11, r11, r9 \n\t" \
  448. "adc r12, r12, #0 \n\t" \
  449. "ldr r14, [r0] \n\t" \
  450. "adds r10, r10, r14 \n\t" \
  451. "adcs r11, r11, #0 \n\t" \
  452. "adc r12, r12, #0 \n\t" \
  453. "stmia r0!, {r10} \n\t" \
  454. \
  455. "mov r14, #0 \n\t" \
  456. "umull r9, r10, r4, r8 \n\t" \
  457. "adds r11, r11, r9 \n\t" \
  458. "adcs r12, r12, r10 \n\t" \
  459. "adc r14, r14, #0 \n\t" \
  460. "umull r9, r10, r5, r7 \n\t" \
  461. "adds r11, r11, r9 \n\t" \
  462. "adcs r12, r12, r10 \n\t" \
  463. "adc r14, r14, #0 \n\t" \
  464. "stmia r0!, {r11} \n\t" \
  465. \
  466. "umull r10, r11, r5, r8 \n\t" \
  467. "adds r12, r12, r10 \n\t" \
  468. "adc r14, r14, r11 \n\t" \
  469. "stmia r0!, {r12, r14} \n\t" \
  470. "pop {r3} \n\t"
  471. #define FAST_MULT_ASM_6_TO_7 \
  472. "cmp r3, #6 \n\t" \
  473. "beq 1f \n\t" \
  474. \
  475. /* r4 = left high, r5 = right high */ \
  476. "ldr r4, [r1] \n\t" \
  477. "ldr r5, [r2] \n\t" \
  478. \
  479. "sub r0, #24 \n\t" \
  480. "sub r1, #24 \n\t" \
  481. "sub r2, #24 \n\t" \
  482. \
  483. "ldr r6, [r0] \n\t" \
  484. "ldr r7, [r1], #4 \n\t" \
  485. "ldr r8, [r2], #4 \n\t" \
  486. "mov r14, #0 \n\t" \
  487. "umull r9, r10, r4, r8 \n\t" \
  488. "umull r11, r12, r5, r7 \n\t" \
  489. "adds r9, r9, r6 \n\t" \
  490. "adc r10, r10, #0 \n\t" \
  491. "adds r9, r9, r11 \n\t" \
  492. "adcs r10, r10, r12 \n\t" \
  493. "adc r14, r14, #0 \n\t" \
  494. "str r9, [r0], #4 \n\t" \
  495. \
  496. "ldr r6, [r0] \n\t" \
  497. "adds r10, r10, r6 \n\t" \
  498. "adcs r14, r14, #0 \n\t" \
  499. "ldr r7, [r1], #4 \n\t" \
  500. "ldr r8, [r2], #4 \n\t" \
  501. "mov r9, #0 \n\t" \
  502. "umull r11, r12, r4, r8 \n\t" \
  503. "adds r10, r10, r11 \n\t" \
  504. "adcs r14, r14, r12 \n\t" \
  505. "adc r9, r9, #0 \n\t" \
  506. "umull r11, r12, r5, r7 \n\t" \
  507. "adds r10, r10, r11 \n\t" \
  508. "adcs r14, r14, r12 \n\t" \
  509. "adc r9, r9, #0 \n\t" \
  510. "str r10, [r0], #4 \n\t" \
  511. \
  512. "ldr r6, [r0] \n\t" \
  513. "adds r14, r14, r6 \n\t" \
  514. "adcs r9, r9, #0 \n\t" \
  515. "ldr r7, [r1], #4 \n\t" \
  516. "ldr r8, [r2], #4 \n\t" \
  517. "mov r10, #0 \n\t" \
  518. "umull r11, r12, r4, r8 \n\t" \
  519. "adds r14, r14, r11 \n\t" \
  520. "adcs r9, r9, r12 \n\t" \
  521. "adc r10, r10, #0 \n\t" \
  522. "umull r11, r12, r5, r7 \n\t" \
  523. "adds r14, r14, r11 \n\t" \
  524. "adcs r9, r9, r12 \n\t" \
  525. "adc r10, r10, #0 \n\t" \
  526. "str r14, [r0], #4 \n\t" \
  527. \
  528. "ldr r6, [r0] \n\t" \
  529. "adds r9, r9, r6 \n\t" \
  530. "adcs r10, r10, #0 \n\t" \
  531. "ldr r7, [r1], #4 \n\t" \
  532. "ldr r8, [r2], #4 \n\t" \
  533. "mov r14, #0 \n\t" \
  534. "umull r11, r12, r4, r8 \n\t" \
  535. "adds r9, r9, r11 \n\t" \
  536. "adcs r10, r10, r12 \n\t" \
  537. "adc r14, r14, #0 \n\t" \
  538. "umull r11, r12, r5, r7 \n\t" \
  539. "adds r9, r9, r11 \n\t" \
  540. "adcs r10, r10, r12 \n\t" \
  541. "adc r14, r14, #0 \n\t" \
  542. "str r9, [r0], #4 \n\t" \
  543. \
  544. "ldr r6, [r0] \n\t" \
  545. "adds r10, r10, r6 \n\t" \
  546. "adcs r14, r14, #0 \n\t" \
  547. "ldr r7, [r1], #4 \n\t" \
  548. "ldr r8, [r2], #4 \n\t" \
  549. "mov r9, #0 \n\t" \
  550. "umull r11, r12, r4, r8 \n\t" \
  551. "adds r10, r10, r11 \n\t" \
  552. "adcs r14, r14, r12 \n\t" \
  553. "adc r9, r9, #0 \n\t" \
  554. "umull r11, r12, r5, r7 \n\t" \
  555. "adds r10, r10, r11 \n\t" \
  556. "adcs r14, r14, r12 \n\t" \
  557. "adc r9, r9, #0 \n\t" \
  558. "str r10, [r0], #4 \n\t" \
  559. \
  560. "ldr r6, [r0] \n\t" \
  561. "adds r14, r14, r6 \n\t" \
  562. "adcs r9, r9, #0 \n\t" \
  563. /* skip past already-loaded (r4, r5) */ \
  564. "ldr r7, [r1], #8 \n\t" \
  565. "ldr r8, [r2], #8 \n\t" \
  566. "mov r10, #0 \n\t" \
  567. "umull r11, r12, r4, r8 \n\t" \
  568. "adds r14, r14, r11 \n\t" \
  569. "adcs r9, r9, r12 \n\t" \
  570. "adc r10, r10, #0 \n\t" \
  571. "umull r11, r12, r5, r7 \n\t" \
  572. "adds r14, r14, r11 \n\t" \
  573. "adcs r9, r9, r12 \n\t" \
  574. "adc r10, r10, #0 \n\t" \
  575. "str r14, [r0], #4 \n\t" \
  576. \
  577. "umull r11, r12, r4, r5 \n\t" \
  578. "adds r11, r11, r9 \n\t" \
  579. "adc r12, r12, r10 \n\t" \
  580. "stmia r0!, {r11, r12} \n\t"
  581. #define FAST_MULT_ASM_7 \
  582. "push {r3} \n\t" \
  583. "add r0, 24 \n\t" \
  584. "add r2, 24 \n\t" \
  585. "ldmia r1!, {r3} \n\t" \
  586. "ldmia r2!, {r6} \n\t" \
  587. \
  588. "umull r9, r10, r3, r6 \n\t" \
  589. "stmia r0!, {r9, r10} \n\t" \
  590. \
  591. "sub r0, 20 \n\t" \
  592. "sub r2, 16 \n\t" \
  593. "ldmia r2!, {r6, r7, r8} \n\t" \
  594. "ldmia r1!, {r4, r5} \n\t" \
  595. \
  596. "umull r9, r10, r3, r6 \n\t" \
  597. "stmia r0!, {r9} \n\t" \
  598. \
  599. "mov r14, #0 \n\t" \
  600. "umull r9, r12, r3, r7 \n\t" \
  601. "adds r10, r10, r9 \n\t" \
  602. "adc r12, r12, #0 \n\t" \
  603. "umull r9, r11, r4, r6 \n\t" \
  604. "adds r10, r10, r9 \n\t" \
  605. "adcs r12, r12, r11 \n\t" \
  606. "adc r14, r14, #0 \n\t" \
  607. "stmia r0!, {r10} \n\t" \
  608. \
  609. "mov r9, #0 \n\t" \
  610. "umull r10, r11, r3, r8 \n\t" \
  611. "adds r12, r12, r10 \n\t" \
  612. "adcs r14, r14, r11 \n\t" \
  613. "adc r9, r9, #0 \n\t" \
  614. "umull r10, r11, r4, r7 \n\t" \
  615. "adds r12, r12, r10 \n\t" \
  616. "adcs r14, r14, r11 \n\t" \
  617. "adc r9, r9, #0 \n\t" \
  618. "umull r10, r11, r5, r6 \n\t" \
  619. "adds r12, r12, r10 \n\t" \
  620. "adcs r14, r14, r11 \n\t" \
  621. "adc r9, r9, #0 \n\t" \
  622. "stmia r0!, {r12} \n\t" \
  623. \
  624. "ldmia r1!, {r3} \n\t" \
  625. "mov r10, #0 \n\t" \
  626. "umull r11, r12, r4, r8 \n\t" \
  627. "adds r14, r14, r11 \n\t" \
  628. "adcs r9, r9, r12 \n\t" \
  629. "adc r10, r10, #0 \n\t" \
  630. "umull r11, r12, r5, r7 \n\t" \
  631. "adds r14, r14, r11 \n\t" \
  632. "adcs r9, r9, r12 \n\t" \
  633. "adc r10, r10, #0 \n\t" \
  634. "umull r11, r12, r3, r6 \n\t" \
  635. "adds r14, r14, r11 \n\t" \
  636. "adcs r9, r9, r12 \n\t" \
  637. "adc r10, r10, #0 \n\t" \
  638. "ldr r11, [r0] \n\t" \
  639. "adds r14, r14, r11 \n\t" \
  640. "adcs r9, r9, #0 \n\t" \
  641. "adc r10, r10, #0 \n\t" \
  642. "stmia r0!, {r14} \n\t" \
  643. \
  644. "ldmia r2!, {r6} \n\t" \
  645. "mov r11, #0 \n\t" \
  646. "umull r12, r14, r4, r6 \n\t" \
  647. "adds r9, r9, r12 \n\t" \
  648. "adcs r10, r10, r14 \n\t" \
  649. "adc r11, r11, #0 \n\t" \
  650. "umull r12, r14, r5, r8 \n\t" \
  651. "adds r9, r9, r12 \n\t" \
  652. "adcs r10, r10, r14 \n\t" \
  653. "adc r11, r11, #0 \n\t" \
  654. "umull r12, r14, r3, r7 \n\t" \
  655. "adds r9, r9, r12 \n\t" \
  656. "adcs r10, r10, r14 \n\t" \
  657. "adc r11, r11, #0 \n\t" \
  658. "ldr r12, [r0] \n\t" \
  659. "adds r9, r9, r12 \n\t" \
  660. "adcs r10, r10, #0 \n\t" \
  661. "adc r11, r11, #0 \n\t" \
  662. "stmia r0!, {r9} \n\t" \
  663. \
  664. "mov r12, #0 \n\t" \
  665. "umull r14, r9, r5, r6 \n\t" \
  666. "adds r10, r10, r14 \n\t" \
  667. "adcs r11, r11, r9 \n\t" \
  668. "adc r12, r12, #0 \n\t" \
  669. "umull r14, r9, r3, r8 \n\t" \
  670. "adds r10, r10, r14 \n\t" \
  671. "adcs r11, r11, r9 \n\t" \
  672. "adc r12, r12, #0 \n\t" \
  673. "stmia r0!, {r10} \n\t" \
  674. \
  675. "umull r9, r10, r3, r6 \n\t" \
  676. "adds r11, r11, r9 \n\t" \
  677. "adc r12, r12, r10 \n\t" \
  678. "stmia r0!, {r11, r12} \n\t" \
  679. \
  680. "sub r0, 44 \n\t" \
  681. "sub r1, 16 \n\t" \
  682. "sub r2, 28 \n\t" \
  683. "ldmia r1!, {r3,r4,r5} \n\t" \
  684. "ldmia r2!, {r6,r7,r8} \n\t" \
  685. \
  686. "umull r9, r10, r3, r6 \n\t" \
  687. "stmia r0!, {r9} \n\t" \
  688. \
  689. "mov r14, #0 \n\t" \
  690. "umull r9, r12, r3, r7 \n\t" \
  691. "adds r10, r10, r9 \n\t" \
  692. "adc r12, r12, #0 \n\t" \
  693. "umull r9, r11, r4, r6 \n\t" \
  694. "adds r10, r10, r9 \n\t" \
  695. "adcs r12, r12, r11 \n\t" \
  696. "adc r14, r14, #0 \n\t" \
  697. "stmia r0!, {r10} \n\t" \
  698. \
  699. "mov r9, #0 \n\t" \
  700. "umull r10, r11, r3, r8 \n\t" \
  701. "adds r12, r12, r10 \n\t" \
  702. "adcs r14, r14, r11 \n\t" \
  703. "adc r9, r9, #0 \n\t" \
  704. "umull r10, r11, r4, r7 \n\t" \
  705. "adds r12, r12, r10 \n\t" \
  706. "adcs r14, r14, r11 \n\t" \
  707. "adc r9, r9, #0 \n\t" \
  708. "umull r10, r11, r5, r6 \n\t" \
  709. "adds r12, r12, r10 \n\t" \
  710. "adcs r14, r14, r11 \n\t" \
  711. "adc r9, r9, #0 \n\t" \
  712. "stmia r0!, {r12} \n\t" \
  713. \
  714. "ldmia r1!, {r3} \n\t" \
  715. "mov r10, #0 \n\t" \
  716. "umull r11, r12, r4, r8 \n\t" \
  717. "adds r14, r14, r11 \n\t" \
  718. "adcs r9, r9, r12 \n\t" \
  719. "adc r10, r10, #0 \n\t" \
  720. "umull r11, r12, r5, r7 \n\t" \
  721. "adds r14, r14, r11 \n\t" \
  722. "adcs r9, r9, r12 \n\t" \
  723. "adc r10, r10, #0 \n\t" \
  724. "umull r11, r12, r3, r6 \n\t" \
  725. "adds r14, r14, r11 \n\t" \
  726. "adcs r9, r9, r12 \n\t" \
  727. "adc r10, r10, #0 \n\t" \
  728. "ldr r11, [r0] \n\t" \
  729. "adds r14, r14, r11 \n\t" \
  730. "adcs r9, r9, #0 \n\t" \
  731. "adc r10, r10, #0 \n\t" \
  732. "stmia r0!, {r14} \n\t" \
  733. \
  734. "ldmia r1!, {r4} \n\t" \
  735. "mov r11, #0 \n\t" \
  736. "umull r12, r14, r5, r8 \n\t" \
  737. "adds r9, r9, r12 \n\t" \
  738. "adcs r10, r10, r14 \n\t" \
  739. "adc r11, r11, #0 \n\t" \
  740. "umull r12, r14, r3, r7 \n\t" \
  741. "adds r9, r9, r12 \n\t" \
  742. "adcs r10, r10, r14 \n\t" \
  743. "adc r11, r11, #0 \n\t" \
  744. "umull r12, r14, r4, r6 \n\t" \
  745. "adds r9, r9, r12 \n\t" \
  746. "adcs r10, r10, r14 \n\t" \
  747. "adc r11, r11, #0 \n\t" \
  748. "ldr r12, [r0] \n\t" \
  749. "adds r9, r9, r12 \n\t" \
  750. "adcs r10, r10, #0 \n\t" \
  751. "adc r11, r11, #0 \n\t" \
  752. "stmia r0!, {r9} \n\t" \
  753. \
  754. "ldmia r1!, {r5} \n\t" \
  755. "mov r12, #0 \n\t" \
  756. "umull r14, r9, r3, r8 \n\t" \
  757. "adds r10, r10, r14 \n\t" \
  758. "adcs r11, r11, r9 \n\t" \
  759. "adc r12, r12, #0 \n\t" \
  760. "umull r14, r9, r4, r7 \n\t" \
  761. "adds r10, r10, r14 \n\t" \
  762. "adcs r11, r11, r9 \n\t" \
  763. "adc r12, r12, #0 \n\t" \
  764. "umull r14, r9, r5, r6 \n\t" \
  765. "adds r10, r10, r14 \n\t" \
  766. "adcs r11, r11, r9 \n\t" \
  767. "adc r12, r12, #0 \n\t" \
  768. "ldr r14, [r0] \n\t" \
  769. "adds r10, r10, r14 \n\t" \
  770. "adcs r11, r11, #0 \n\t" \
  771. "adc r12, r12, #0 \n\t" \
  772. "stmia r0!, {r10} \n\t" \
  773. \
  774. "ldmia r1!, {r3} \n\t" \
  775. "mov r14, #0 \n\t" \
  776. "umull r9, r10, r4, r8 \n\t" \
  777. "adds r11, r11, r9 \n\t" \
  778. "adcs r12, r12, r10 \n\t" \
  779. "adc r14, r14, #0 \n\t" \
  780. "umull r9, r10, r5, r7 \n\t" \
  781. "adds r11, r11, r9 \n\t" \
  782. "adcs r12, r12, r10 \n\t" \
  783. "adc r14, r14, #0 \n\t" \
  784. "umull r9, r10, r3, r6 \n\t" \
  785. "adds r11, r11, r9 \n\t" \
  786. "adcs r12, r12, r10 \n\t" \
  787. "adc r14, r14, #0 \n\t" \
  788. "ldr r9, [r0] \n\t" \
  789. "adds r11, r11, r9 \n\t" \
  790. "adcs r12, r12, #0 \n\t" \
  791. "adc r14, r14, #0 \n\t" \
  792. "stmia r0!, {r11} \n\t" \
  793. \
  794. "ldmia r2!, {r6} \n\t" \
  795. "mov r9, #0 \n\t" \
  796. "umull r10, r11, r4, r6 \n\t" \
  797. "adds r12, r12, r10 \n\t" \
  798. "adcs r14, r14, r11 \n\t" \
  799. "adc r9, r9, #0 \n\t" \
  800. "umull r10, r11, r5, r8 \n\t" \
  801. "adds r12, r12, r10 \n\t" \
  802. "adcs r14, r14, r11 \n\t" \
  803. "adc r9, r9, #0 \n\t" \
  804. "umull r10, r11, r3, r7 \n\t" \
  805. "adds r12, r12, r10 \n\t" \
  806. "adcs r14, r14, r11 \n\t" \
  807. "adc r9, r9, #0 \n\t" \
  808. "ldr r10, [r0] \n\t" \
  809. "adds r12, r12, r10 \n\t" \
  810. "adcs r14, r14, #0 \n\t" \
  811. "adc r9, r9, #0 \n\t" \
  812. "stmia r0!, {r12} \n\t" \
  813. \
  814. "ldmia r2!, {r7} \n\t" \
  815. "mov r10, #0 \n\t" \
  816. "umull r11, r12, r4, r7 \n\t" \
  817. "adds r14, r14, r11 \n\t" \
  818. "adcs r9, r9, r12 \n\t" \
  819. "adc r10, r10, #0 \n\t" \
  820. "umull r11, r12, r5, r6 \n\t" \
  821. "adds r14, r14, r11 \n\t" \
  822. "adcs r9, r9, r12 \n\t" \
  823. "adc r10, r10, #0 \n\t" \
  824. "umull r11, r12, r3, r8 \n\t" \
  825. "adds r14, r14, r11 \n\t" \
  826. "adcs r9, r9, r12 \n\t" \
  827. "adc r10, r10, #0 \n\t" \
  828. "ldr r11, [r0] \n\t" \
  829. "adds r14, r14, r11 \n\t" \
  830. "adcs r9, r9, #0 \n\t" \
  831. "adc r10, r10, #0 \n\t" \
  832. "stmia r0!, {r14} \n\t" \
  833. \
  834. "ldmia r2!, {r8} \n\t" \
  835. "mov r11, #0 \n\t" \
  836. "umull r12, r14, r4, r8 \n\t" \
  837. "adds r9, r9, r12 \n\t" \
  838. "adcs r10, r10, r14 \n\t" \
  839. "adc r11, r11, #0 \n\t" \
  840. "umull r12, r14, r5, r7 \n\t" \
  841. "adds r9, r9, r12 \n\t" \
  842. "adcs r10, r10, r14 \n\t" \
  843. "adc r11, r11, #0 \n\t" \
  844. "umull r12, r14, r3, r6 \n\t" \
  845. "adds r9, r9, r12 \n\t" \
  846. "adcs r10, r10, r14 \n\t" \
  847. "adc r11, r11, #0 \n\t" \
  848. "ldr r12, [r0] \n\t" \
  849. "adds r9, r9, r12 \n\t" \
  850. "adcs r10, r10, #0 \n\t" \
  851. "adc r11, r11, #0 \n\t" \
  852. "stmia r0!, {r9} \n\t" \
  853. \
  854. "ldmia r2!, {r6} \n\t" \
  855. "mov r12, #0 \n\t" \
  856. "umull r14, r9, r4, r6 \n\t" \
  857. "adds r10, r10, r14 \n\t" \
  858. "adcs r11, r11, r9 \n\t" \
  859. "adc r12, r12, #0 \n\t" \
  860. "umull r14, r9, r5, r8 \n\t" \
  861. "adds r10, r10, r14 \n\t" \
  862. "adcs r11, r11, r9 \n\t" \
  863. "adc r12, r12, #0 \n\t" \
  864. "umull r14, r9, r3, r7 \n\t" \
  865. "adds r10, r10, r14 \n\t" \
  866. "adcs r11, r11, r9 \n\t" \
  867. "adc r12, r12, #0 \n\t" \
  868. "ldr r14, [r0] \n\t" \
  869. "adds r10, r10, r14 \n\t" \
  870. "adcs r11, r11, #0 \n\t" \
  871. "adc r12, r12, #0 \n\t" \
  872. "stmia r0!, {r10} \n\t" \
  873. \
  874. "mov r14, #0 \n\t" \
  875. "umull r9, r10, r5, r6 \n\t" \
  876. "adds r11, r11, r9 \n\t" \
  877. "adcs r12, r12, r10 \n\t" \
  878. "adc r14, r14, #0 \n\t" \
  879. "umull r9, r10, r3, r8 \n\t" \
  880. "adds r11, r11, r9 \n\t" \
  881. "adcs r12, r12, r10 \n\t" \
  882. "adc r14, r14, #0 \n\t" \
  883. "stmia r0!, {r11} \n\t" \
  884. \
  885. "umull r10, r11, r3, r6 \n\t" \
  886. "adds r12, r12, r10 \n\t" \
  887. "adc r14, r14, r11 \n\t" \
  888. "stmia r0!, {r12, r14} \n\t" \
  889. "pop {r3} \n\t"
  890. #define FAST_MULT_ASM_7_TO_8 \
  891. "cmp r3, #7 \n\t" \
  892. "beq 1f \n\t" \
  893. \
  894. /* r4 = left high, r5 = right high */ \
  895. "ldr r4, [r1] \n\t" \
  896. "ldr r5, [r2] \n\t" \
  897. \
  898. "sub r0, #28 \n\t" \
  899. "sub r1, #28 \n\t" \
  900. "sub r2, #28 \n\t" \
  901. \
  902. "ldr r6, [r0] \n\t" \
  903. "ldr r7, [r1], #4 \n\t" \
  904. "ldr r8, [r2], #4 \n\t" \
  905. "mov r14, #0 \n\t" \
  906. "umull r9, r10, r4, r8 \n\t" \
  907. "umull r11, r12, r5, r7 \n\t" \
  908. "adds r9, r9, r6 \n\t" \
  909. "adc r10, r10, #0 \n\t" \
  910. "adds r9, r9, r11 \n\t" \
  911. "adcs r10, r10, r12 \n\t" \
  912. "adc r14, r14, #0 \n\t" \
  913. "str r9, [r0], #4 \n\t" \
  914. \
  915. "ldr r6, [r0] \n\t" \
  916. "adds r10, r10, r6 \n\t" \
  917. "adcs r14, r14, #0 \n\t" \
  918. "ldr r7, [r1], #4 \n\t" \
  919. "ldr r8, [r2], #4 \n\t" \
  920. "mov r9, #0 \n\t" \
  921. "umull r11, r12, r4, r8 \n\t" \
  922. "adds r10, r10, r11 \n\t" \
  923. "adcs r14, r14, r12 \n\t" \
  924. "adc r9, r9, #0 \n\t" \
  925. "umull r11, r12, r5, r7 \n\t" \
  926. "adds r10, r10, r11 \n\t" \
  927. "adcs r14, r14, r12 \n\t" \
  928. "adc r9, r9, #0 \n\t" \
  929. "str r10, [r0], #4 \n\t" \
  930. \
  931. "ldr r6, [r0] \n\t" \
  932. "adds r14, r14, r6 \n\t" \
  933. "adcs r9, r9, #0 \n\t" \
  934. "ldr r7, [r1], #4 \n\t" \
  935. "ldr r8, [r2], #4 \n\t" \
  936. "mov r10, #0 \n\t" \
  937. "umull r11, r12, r4, r8 \n\t" \
  938. "adds r14, r14, r11 \n\t" \
  939. "adcs r9, r9, r12 \n\t" \
  940. "adc r10, r10, #0 \n\t" \
  941. "umull r11, r12, r5, r7 \n\t" \
  942. "adds r14, r14, r11 \n\t" \
  943. "adcs r9, r9, r12 \n\t" \
  944. "adc r10, r10, #0 \n\t" \
  945. "str r14, [r0], #4 \n\t" \
  946. \
  947. "ldr r6, [r0] \n\t" \
  948. "adds r9, r9, r6 \n\t" \
  949. "adcs r10, r10, #0 \n\t" \
  950. "ldr r7, [r1], #4 \n\t" \
  951. "ldr r8, [r2], #4 \n\t" \
  952. "mov r14, #0 \n\t" \
  953. "umull r11, r12, r4, r8 \n\t" \
  954. "adds r9, r9, r11 \n\t" \
  955. "adcs r10, r10, r12 \n\t" \
  956. "adc r14, r14, #0 \n\t" \
  957. "umull r11, r12, r5, r7 \n\t" \
  958. "adds r9, r9, r11 \n\t" \
  959. "adcs r10, r10, r12 \n\t" \
  960. "adc r14, r14, #0 \n\t" \
  961. "str r9, [r0], #4 \n\t" \
  962. \
  963. "ldr r6, [r0] \n\t" \
  964. "adds r10, r10, r6 \n\t" \
  965. "adcs r14, r14, #0 \n\t" \
  966. "ldr r7, [r1], #4 \n\t" \
  967. "ldr r8, [r2], #4 \n\t" \
  968. "mov r9, #0 \n\t" \
  969. "umull r11, r12, r4, r8 \n\t" \
  970. "adds r10, r10, r11 \n\t" \
  971. "adcs r14, r14, r12 \n\t" \
  972. "adc r9, r9, #0 \n\t" \
  973. "umull r11, r12, r5, r7 \n\t" \
  974. "adds r10, r10, r11 \n\t" \
  975. "adcs r14, r14, r12 \n\t" \
  976. "adc r9, r9, #0 \n\t" \
  977. "str r10, [r0], #4 \n\t" \
  978. \
  979. "ldr r6, [r0] \n\t" \
  980. "adds r14, r14, r6 \n\t" \
  981. "adcs r9, r9, #0 \n\t" \
  982. "ldr r7, [r1], #4 \n\t" \
  983. "ldr r8, [r2], #4 \n\t" \
  984. "mov r10, #0 \n\t" \
  985. "umull r11, r12, r4, r8 \n\t" \
  986. "adds r14, r14, r11 \n\t" \
  987. "adcs r9, r9, r12 \n\t" \
  988. "adc r10, r10, #0 \n\t" \
  989. "umull r11, r12, r5, r7 \n\t" \
  990. "adds r14, r14, r11 \n\t" \
  991. "adcs r9, r9, r12 \n\t" \
  992. "adc r10, r10, #0 \n\t" \
  993. "str r14, [r0], #4 \n\t" \
  994. \
  995. "ldr r6, [r0] \n\t" \
  996. "adds r9, r9, r6 \n\t" \
  997. "adcs r10, r10, #0 \n\t" \
  998. /* skip past already-loaded (r4, r5) */ \
  999. "ldr r7, [r1], #8 \n\t" \
  1000. "ldr r8, [r2], #8 \n\t" \
  1001. "mov r14, #0 \n\t" \
  1002. "umull r11, r12, r4, r8 \n\t" \
  1003. "adds r9, r9, r11 \n\t" \
  1004. "adcs r10, r10, r12 \n\t" \
  1005. "adc r14, r14, #0 \n\t" \
  1006. "umull r11, r12, r5, r7 \n\t" \
  1007. "adds r9, r9, r11 \n\t" \
  1008. "adcs r10, r10, r12 \n\t" \
  1009. "adc r14, r14, #0 \n\t" \
  1010. "str r9, [r0], #4 \n\t" \
  1011. \
  1012. "umull r11, r12, r4, r5 \n\t" \
  1013. "adds r11, r11, r10 \n\t" \
  1014. "adc r12, r12, r14 \n\t" \
  1015. "stmia r0!, {r11, r12} \n\t"
  1016. #define FAST_MULT_ASM_8 \
  1017. "push {r3} \n\t" \
  1018. "add r0, 24 \n\t" \
  1019. "add r2, 24 \n\t" \
  1020. "ldmia r1!, {r3,r4} \n\t" \
  1021. "ldmia r2!, {r6,r7} \n\t" \
  1022. \
  1023. "umull r11, r12, r3, r6 \n\t" \
  1024. "stmia r0!, {r11} \n\t" \
  1025. \
  1026. "mov r10, #0 \n\t" \
  1027. "umull r11, r9, r3, r7 \n\t" \
  1028. "adds r12, r12, r11 \n\t" \
  1029. "adc r9, r9, #0 \n\t" \
  1030. "umull r11, r14, r4, r6 \n\t" \
  1031. "adds r12, r12, r11 \n\t" \
  1032. "adcs r9, r9, r14 \n\t" \
  1033. "adc r10, r10, #0 \n\t" \
  1034. "stmia r0!, {r12} \n\t" \
  1035. \
  1036. "umull r12, r14, r4, r7 \n\t" \
  1037. "adds r9, r9, r12 \n\t" \
  1038. "adc r10, r10, r14 \n\t" \
  1039. "stmia r0!, {r9, r10} \n\t" \
  1040. \
  1041. "sub r0, 28 \n\t" \
  1042. "sub r2, 20 \n\t" \
  1043. "ldmia r2!, {r6,r7,r8} \n\t" \
  1044. "ldmia r1!, {r5} \n\t" \
  1045. \
  1046. "umull r11, r12, r3, r6 \n\t" \
  1047. "stmia r0!, {r11} \n\t" \
  1048. \
  1049. "mov r10, #0 \n\t" \
  1050. "umull r11, r9, r3, r7 \n\t" \
  1051. "adds r12, r12, r11 \n\t" \
  1052. "adc r9, r9, #0 \n\t" \
  1053. "umull r11, r14, r4, r6 \n\t" \
  1054. "adds r12, r12, r11 \n\t" \
  1055. "adcs r9, r9, r14 \n\t" \
  1056. "adc r10, r10, #0 \n\t" \
  1057. "stmia r0!, {r12} \n\t" \
  1058. \
  1059. "mov r11, #0 \n\t" \
  1060. "umull r12, r14, r3, r8 \n\t" \
  1061. "adds r9, r9, r12 \n\t" \
  1062. "adcs r10, r10, r14 \n\t" \
  1063. "adc r11, r11, #0 \n\t" \
  1064. "umull r12, r14, r4, r7 \n\t" \
  1065. "adds r9, r9, r12 \n\t" \
  1066. "adcs r10, r10, r14 \n\t" \
  1067. "adc r11, r11, #0 \n\t" \
  1068. "umull r12, r14, r5, r6 \n\t" \
  1069. "adds r9, r9, r12 \n\t" \
  1070. "adcs r10, r10, r14 \n\t" \
  1071. "adc r11, r11, #0 \n\t" \
  1072. "stmia r0!, {r9} \n\t" \
  1073. \
  1074. "ldmia r1!, {r3} \n\t" \
  1075. "mov r12, #0 \n\t" \
  1076. "umull r14, r9, r4, r8 \n\t" \
  1077. "adds r10, r10, r14 \n\t" \
  1078. "adcs r11, r11, r9 \n\t" \
  1079. "adc r12, r12, #0 \n\t" \
  1080. "umull r14, r9, r5, r7 \n\t" \
  1081. "adds r10, r10, r14 \n\t" \
  1082. "adcs r11, r11, r9 \n\t" \
  1083. "adc r12, r12, #0 \n\t" \
  1084. "umull r14, r9, r3, r6 \n\t" \
  1085. "adds r10, r10, r14 \n\t" \
  1086. "adcs r11, r11, r9 \n\t" \
  1087. "adc r12, r12, #0 \n\t" \
  1088. "ldr r14, [r0] \n\t" \
  1089. "adds r10, r10, r14 \n\t" \
  1090. "adcs r11, r11, #0 \n\t" \
  1091. "adc r12, r12, #0 \n\t" \
  1092. "stmia r0!, {r10} \n\t" \
  1093. \
  1094. "ldmia r1!, {r4} \n\t" \
  1095. "mov r14, #0 \n\t" \
  1096. "umull r9, r10, r5, r8 \n\t" \
  1097. "adds r11, r11, r9 \n\t" \
  1098. "adcs r12, r12, r10 \n\t" \
  1099. "adc r14, r14, #0 \n\t" \
  1100. "umull r9, r10, r3, r7 \n\t" \
  1101. "adds r11, r11, r9 \n\t" \
  1102. "adcs r12, r12, r10 \n\t" \
  1103. "adc r14, r14, #0 \n\t" \
  1104. "umull r9, r10, r4, r6 \n\t" \
  1105. "adds r11, r11, r9 \n\t" \
  1106. "adcs r12, r12, r10 \n\t" \
  1107. "adc r14, r14, #0 \n\t" \
  1108. "ldr r9, [r0] \n\t" \
  1109. "adds r11, r11, r9 \n\t" \
  1110. "adcs r12, r12, #0 \n\t" \
  1111. "adc r14, r14, #0 \n\t" \
  1112. "stmia r0!, {r11} \n\t" \
  1113. \
  1114. "ldmia r2!, {r6} \n\t" \
  1115. "mov r9, #0 \n\t" \
  1116. "umull r10, r11, r5, r6 \n\t" \
  1117. "adds r12, r12, r10 \n\t" \
  1118. "adcs r14, r14, r11 \n\t" \
  1119. "adc r9, r9, #0 \n\t" \
  1120. "umull r10, r11, r3, r8 \n\t" \
  1121. "adds r12, r12, r10 \n\t" \
  1122. "adcs r14, r14, r11 \n\t" \
  1123. "adc r9, r9, #0 \n\t" \
  1124. "umull r10, r11, r4, r7 \n\t" \
  1125. "adds r12, r12, r10 \n\t" \
  1126. "adcs r14, r14, r11 \n\t" \
  1127. "adc r9, r9, #0 \n\t" \
  1128. "ldr r10, [r0] \n\t" \
  1129. "adds r12, r12, r10 \n\t" \
  1130. "adcs r14, r14, #0 \n\t" \
  1131. "adc r9, r9, #0 \n\t" \
  1132. "stmia r0!, {r12} \n\t" \
  1133. \
  1134. "ldmia r2!, {r7} \n\t" \
  1135. "mov r10, #0 \n\t" \
  1136. "umull r11, r12, r5, r7 \n\t" \
  1137. "adds r14, r14, r11 \n\t" \
  1138. "adcs r9, r9, r12 \n\t" \
  1139. "adc r10, r10, #0 \n\t" \
  1140. "umull r11, r12, r3, r6 \n\t" \
  1141. "adds r14, r14, r11 \n\t" \
  1142. "adcs r9, r9, r12 \n\t" \
  1143. "adc r10, r10, #0 \n\t" \
  1144. "umull r11, r12, r4, r8 \n\t" \
  1145. "adds r14, r14, r11 \n\t" \
  1146. "adcs r9, r9, r12 \n\t" \
  1147. "adc r10, r10, #0 \n\t" \
  1148. "ldr r11, [r0] \n\t" \
  1149. "adds r14, r14, r11 \n\t" \
  1150. "adcs r9, r9, #0 \n\t" \
  1151. "adc r10, r10, #0 \n\t" \
  1152. "stmia r0!, {r14} \n\t" \
  1153. \
  1154. "mov r11, #0 \n\t" \
  1155. "umull r12, r14, r3, r7 \n\t" \
  1156. "adds r9, r9, r12 \n\t" \
  1157. "adcs r10, r10, r14 \n\t" \
  1158. "adc r11, r11, #0 \n\t" \
  1159. "umull r12, r14, r4, r6 \n\t" \
  1160. "adds r9, r9, r12 \n\t" \
  1161. "adcs r10, r10, r14 \n\t" \
  1162. "adc r11, r11, #0 \n\t" \
  1163. "stmia r0!, {r9} \n\t" \
  1164. \
  1165. "umull r14, r9, r4, r7 \n\t" \
  1166. "adds r10, r10, r14 \n\t" \
  1167. "adc r11, r11, r9 \n\t" \
  1168. "stmia r0!, {r10, r11} \n\t" \
  1169. \
  1170. "sub r0, 52 \n\t" \
  1171. "sub r1, 20 \n\t" \
  1172. "sub r2, 32 \n\t" \
  1173. "ldmia r1!, {r3,r4,r5} \n\t" \
  1174. "ldmia r2!, {r6,r7,r8} \n\t" \
  1175. \
  1176. "umull r11, r12, r3, r6 \n\t" \
  1177. "stmia r0!, {r11} \n\t" \
  1178. \
  1179. "mov r10, #0 \n\t" \
  1180. "umull r11, r9, r3, r7 \n\t" \
  1181. "adds r12, r12, r11 \n\t" \
  1182. "adc r9, r9, #0 \n\t" \
  1183. "umull r11, r14, r4, r6 \n\t" \
  1184. "adds r12, r12, r11 \n\t" \
  1185. "adcs r9, r9, r14 \n\t" \
  1186. "adc r10, r10, #0 \n\t" \
  1187. "stmia r0!, {r12} \n\t" \
  1188. \
  1189. "mov r11, #0 \n\t" \
  1190. "umull r12, r14, r3, r8 \n\t" \
  1191. "adds r9, r9, r12 \n\t" \
  1192. "adcs r10, r10, r14 \n\t" \
  1193. "adc r11, r11, #0 \n\t" \
  1194. "umull r12, r14, r4, r7 \n\t" \
  1195. "adds r9, r9, r12 \n\t" \
  1196. "adcs r10, r10, r14 \n\t" \
  1197. "adc r11, r11, #0 \n\t" \
  1198. "umull r12, r14, r5, r6 \n\t" \
  1199. "adds r9, r9, r12 \n\t" \
  1200. "adcs r10, r10, r14 \n\t" \
  1201. "adc r11, r11, #0 \n\t" \
  1202. "stmia r0!, {r9} \n\t" \
  1203. \
  1204. "ldmia r1!, {r3} \n\t" \
  1205. "mov r12, #0 \n\t" \
  1206. "umull r14, r9, r4, r8 \n\t" \
  1207. "adds r10, r10, r14 \n\t" \
  1208. "adcs r11, r11, r9 \n\t" \
  1209. "adc r12, r12, #0 \n\t" \
  1210. "umull r14, r9, r5, r7 \n\t" \
  1211. "adds r10, r10, r14 \n\t" \
  1212. "adcs r11, r11, r9 \n\t" \
  1213. "adc r12, r12, #0 \n\t" \
  1214. "umull r14, r9, r3, r6 \n\t" \
  1215. "adds r10, r10, r14 \n\t" \
  1216. "adcs r11, r11, r9 \n\t" \
  1217. "adc r12, r12, #0 \n\t" \
  1218. "ldr r14, [r0] \n\t" \
  1219. "adds r10, r10, r14 \n\t" \
  1220. "adcs r11, r11, #0 \n\t" \
  1221. "adc r12, r12, #0 \n\t" \
  1222. "stmia r0!, {r10} \n\t" \
  1223. \
  1224. "ldmia r1!, {r4} \n\t" \
  1225. "mov r14, #0 \n\t" \
  1226. "umull r9, r10, r5, r8 \n\t" \
  1227. "adds r11, r11, r9 \n\t" \
  1228. "adcs r12, r12, r10 \n\t" \
  1229. "adc r14, r14, #0 \n\t" \
  1230. "umull r9, r10, r3, r7 \n\t" \
  1231. "adds r11, r11, r9 \n\t" \
  1232. "adcs r12, r12, r10 \n\t" \
  1233. "adc r14, r14, #0 \n\t" \
  1234. "umull r9, r10, r4, r6 \n\t" \
  1235. "adds r11, r11, r9 \n\t" \
  1236. "adcs r12, r12, r10 \n\t" \
  1237. "adc r14, r14, #0 \n\t" \
  1238. "ldr r9, [r0] \n\t" \
  1239. "adds r11, r11, r9 \n\t" \
  1240. "adcs r12, r12, #0 \n\t" \
  1241. "adc r14, r14, #0 \n\t" \
  1242. "stmia r0!, {r11} \n\t" \
  1243. \
  1244. "ldmia r1!, {r5} \n\t" \
  1245. "mov r9, #0 \n\t" \
  1246. "umull r10, r11, r3, r8 \n\t" \
  1247. "adds r12, r12, r10 \n\t" \
  1248. "adcs r14, r14, r11 \n\t" \
  1249. "adc r9, r9, #0 \n\t" \
  1250. "umull r10, r11, r4, r7 \n\t" \
  1251. "adds r12, r12, r10 \n\t" \
  1252. "adcs r14, r14, r11 \n\t" \
  1253. "adc r9, r9, #0 \n\t" \
  1254. "umull r10, r11, r5, r6 \n\t" \
  1255. "adds r12, r12, r10 \n\t" \
  1256. "adcs r14, r14, r11 \n\t" \
  1257. "adc r9, r9, #0 \n\t" \
  1258. "ldr r10, [r0] \n\t" \
  1259. "adds r12, r12, r10 \n\t" \
  1260. "adcs r14, r14, #0 \n\t" \
  1261. "adc r9, r9, #0 \n\t" \
  1262. "stmia r0!, {r12} \n\t" \
  1263. \
  1264. "ldmia r1!, {r3} \n\t" \
  1265. "mov r10, #0 \n\t" \
  1266. "umull r11, r12, r4, r8 \n\t" \
  1267. "adds r14, r14, r11 \n\t" \
  1268. "adcs r9, r9, r12 \n\t" \
  1269. "adc r10, r10, #0 \n\t" \
  1270. "umull r11, r12, r5, r7 \n\t" \
  1271. "adds r14, r14, r11 \n\t" \
  1272. "adcs r9, r9, r12 \n\t" \
  1273. "adc r10, r10, #0 \n\t" \
  1274. "umull r11, r12, r3, r6 \n\t" \
  1275. "adds r14, r14, r11 \n\t" \
  1276. "adcs r9, r9, r12 \n\t" \
  1277. "adc r10, r10, #0 \n\t" \
  1278. "ldr r11, [r0] \n\t" \
  1279. "adds r14, r14, r11 \n\t" \
  1280. "adcs r9, r9, #0 \n\t" \
  1281. "adc r10, r10, #0 \n\t" \
  1282. "stmia r0!, {r14} \n\t" \
  1283. \
  1284. "ldmia r1!, {r4} \n\t" \
  1285. "mov r11, #0 \n\t" \
  1286. "umull r12, r14, r5, r8 \n\t" \
  1287. "adds r9, r9, r12 \n\t" \
  1288. "adcs r10, r10, r14 \n\t" \
  1289. "adc r11, r11, #0 \n\t" \
  1290. "umull r12, r14, r3, r7 \n\t" \
  1291. "adds r9, r9, r12 \n\t" \
  1292. "adcs r10, r10, r14 \n\t" \
  1293. "adc r11, r11, #0 \n\t" \
  1294. "umull r12, r14, r4, r6 \n\t" \
  1295. "adds r9, r9, r12 \n\t" \
  1296. "adcs r10, r10, r14 \n\t" \
  1297. "adc r11, r11, #0 \n\t" \
  1298. "ldr r12, [r0] \n\t" \
  1299. "adds r9, r9, r12 \n\t" \
  1300. "adcs r10, r10, #0 \n\t" \
  1301. "adc r11, r11, #0 \n\t" \
  1302. "stmia r0!, {r9} \n\t" \
  1303. \
  1304. "ldmia r2!, {r6} \n\t" \
  1305. "mov r12, #0 \n\t" \
  1306. "umull r14, r9, r5, r6 \n\t" \
  1307. "adds r10, r10, r14 \n\t" \
  1308. "adcs r11, r11, r9 \n\t" \
  1309. "adc r12, r12, #0 \n\t" \
  1310. "umull r14, r9, r3, r8 \n\t" \
  1311. "adds r10, r10, r14 \n\t" \
  1312. "adcs r11, r11, r9 \n\t" \
  1313. "adc r12, r12, #0 \n\t" \
  1314. "umull r14, r9, r4, r7 \n\t" \
  1315. "adds r10, r10, r14 \n\t" \
  1316. "adcs r11, r11, r9 \n\t" \
  1317. "adc r12, r12, #0 \n\t" \
  1318. "ldr r14, [r0] \n\t" \
  1319. "adds r10, r10, r14 \n\t" \
  1320. "adcs r11, r11, #0 \n\t" \
  1321. "adc r12, r12, #0 \n\t" \
  1322. "stmia r0!, {r10} \n\t" \
  1323. \
  1324. "ldmia r2!, {r7} \n\t" \
  1325. "mov r14, #0 \n\t" \
  1326. "umull r9, r10, r5, r7 \n\t" \
  1327. "adds r11, r11, r9 \n\t" \
  1328. "adcs r12, r12, r10 \n\t" \
  1329. "adc r14, r14, #0 \n\t" \
  1330. "umull r9, r10, r3, r6 \n\t" \
  1331. "adds r11, r11, r9 \n\t" \
  1332. "adcs r12, r12, r10 \n\t" \
  1333. "adc r14, r14, #0 \n\t" \
  1334. "umull r9, r10, r4, r8 \n\t" \
  1335. "adds r11, r11, r9 \n\t" \
  1336. "adcs r12, r12, r10 \n\t" \
  1337. "adc r14, r14, #0 \n\t" \
  1338. "ldr r9, [r0] \n\t" \
  1339. "adds r11, r11, r9 \n\t" \
  1340. "adcs r12, r12, #0 \n\t" \
  1341. "adc r14, r14, #0 \n\t" \
  1342. "stmia r0!, {r11} \n\t" \
  1343. \
  1344. "ldmia r2!, {r8} \n\t" \
  1345. "mov r9, #0 \n\t" \
  1346. "umull r10, r11, r5, r8 \n\t" \
  1347. "adds r12, r12, r10 \n\t" \
  1348. "adcs r14, r14, r11 \n\t" \
  1349. "adc r9, r9, #0 \n\t" \
  1350. "umull r10, r11, r3, r7 \n\t" \
  1351. "adds r12, r12, r10 \n\t" \
  1352. "adcs r14, r14, r11 \n\t" \
  1353. "adc r9, r9, #0 \n\t" \
  1354. "umull r10, r11, r4, r6 \n\t" \
  1355. "adds r12, r12, r10 \n\t" \
  1356. "adcs r14, r14, r11 \n\t" \
  1357. "adc r9, r9, #0 \n\t" \
  1358. "ldr r10, [r0] \n\t" \
  1359. "adds r12, r12, r10 \n\t" \
  1360. "adcs r14, r14, #0 \n\t" \
  1361. "adc r9, r9, #0 \n\t" \
  1362. "stmia r0!, {r12} \n\t" \
  1363. \
  1364. "ldmia r2!, {r6} \n\t" \
  1365. "mov r10, #0 \n\t" \
  1366. "umull r11, r12, r5, r6 \n\t" \
  1367. "adds r14, r14, r11 \n\t" \
  1368. "adcs r9, r9, r12 \n\t" \
  1369. "adc r10, r10, #0 \n\t" \
  1370. "umull r11, r12, r3, r8 \n\t" \
  1371. "adds r14, r14, r11 \n\t" \
  1372. "adcs r9, r9, r12 \n\t" \
  1373. "adc r10, r10, #0 \n\t" \
  1374. "umull r11, r12, r4, r7 \n\t" \
  1375. "adds r14, r14, r11 \n\t" \
  1376. "adcs r9, r9, r12 \n\t" \
  1377. "adc r10, r10, #0 \n\t" \
  1378. "ldr r11, [r0] \n\t" \
  1379. "adds r14, r14, r11 \n\t" \
  1380. "adcs r9, r9, #0 \n\t" \
  1381. "adc r10, r10, #0 \n\t" \
  1382. "stmia r0!, {r14} \n\t" \
  1383. \
  1384. "ldmia r2!, {r7} \n\t" \
  1385. "mov r11, #0 \n\t" \
  1386. "umull r12, r14, r5, r7 \n\t" \
  1387. "adds r9, r9, r12 \n\t" \
  1388. "adcs r10, r10, r14 \n\t" \
  1389. "adc r11, r11, #0 \n\t" \
  1390. "umull r12, r14, r3, r6 \n\t" \
  1391. "adds r9, r9, r12 \n\t" \
  1392. "adcs r10, r10, r14 \n\t" \
  1393. "adc r11, r11, #0 \n\t" \
  1394. "umull r12, r14, r4, r8 \n\t" \
  1395. "adds r9, r9, r12 \n\t" \
  1396. "adcs r10, r10, r14 \n\t" \
  1397. "adc r11, r11, #0 \n\t" \
  1398. "ldr r12, [r0] \n\t" \
  1399. "adds r9, r9, r12 \n\t" \
  1400. "adcs r10, r10, #0 \n\t" \
  1401. "adc r11, r11, #0 \n\t" \
  1402. "stmia r0!, {r9} \n\t" \
  1403. \
  1404. "mov r12, #0 \n\t" \
  1405. "umull r14, r9, r3, r7 \n\t" \
  1406. "adds r10, r10, r14 \n\t" \
  1407. "adcs r11, r11, r9 \n\t" \
  1408. "adc r12, r12, #0 \n\t" \
  1409. "umull r14, r9, r4, r6 \n\t" \
  1410. "adds r10, r10, r14 \n\t" \
  1411. "adcs r11, r11, r9 \n\t" \
  1412. "adc r12, r12, #0 \n\t" \
  1413. "stmia r0!, {r10} \n\t" \
  1414. \
  1415. "umull r9, r10, r4, r7 \n\t" \
  1416. "adds r11, r11, r9 \n\t" \
  1417. "adc r12, r12, r10 \n\t" \
  1418. "stmia r0!, {r11, r12} \n\t" \
  1419. "pop {r3} \n\t"
  1420. #define FAST_SQUARE_ASM_5 \
  1421. "push {r2} \n\t" \
  1422. "ldmia r1!, {r2,r3,r4,r5,r6} \n\t" \
  1423. "push {r1} \n\t" \
  1424. \
  1425. "umull r11, r12, r2, r2 \n\t" \
  1426. "stmia r0!, {r11} \n\t" \
  1427. \
  1428. "mov r9, #0 \n\t" \
  1429. "umull r10, r11, r2, r3 \n\t" \
  1430. "adds r12, r12, r10 \n\t" \
  1431. "adcs r8, r11, #0 \n\t" \
  1432. "adc r9, r9, #0 \n\t" \
  1433. "adds r12, r12, r10 \n\t" \
  1434. "adcs r8, r8, r11 \n\t" \
  1435. "adc r9, r9, #0 \n\t" \
  1436. "stmia r0!, {r12} \n\t" \
  1437. \
  1438. "mov r10, #0 \n\t" \
  1439. "umull r11, r12, r2, r4 \n\t" \
  1440. "adds r11, r11, r11 \n\t" \
  1441. "adcs r12, r12, r12 \n\t" \
  1442. "adc r10, r10, #0 \n\t" \
  1443. "adds r8, r8, r11 \n\t" \
  1444. "adcs r9, r9, r12 \n\t" \
  1445. "adc r10, r10, #0 \n\t" \
  1446. "umull r11, r12, r3, r3 \n\t" \
  1447. "adds r8, r8, r11 \n\t" \
  1448. "adcs r9, r9, r12 \n\t" \
  1449. "adc r10, r10, #0 \n\t" \
  1450. "stmia r0!, {r8} \n\t" \
  1451. \
  1452. "mov r12, #0 \n\t" \
  1453. "umull r8, r11, r2, r5 \n\t" \
  1454. "umull r1, r14, r3, r4 \n\t" \
  1455. "adds r8, r8, r1 \n\t" \
  1456. "adcs r11, r11, r14 \n\t" \
  1457. "adc r12, r12, #0 \n\t" \
  1458. "adds r8, r8, r8 \n\t" \
  1459. "adcs r11, r11, r11 \n\t" \
  1460. "adc r12, r12, r12 \n\t" \
  1461. "adds r8, r8, r9 \n\t" \
  1462. "adcs r11, r11, r10 \n\t" \
  1463. "adc r12, r12, #0 \n\t" \
  1464. "stmia r0!, {r8} \n\t" \
  1465. \
  1466. "mov r10, #0 \n\t" \
  1467. "umull r8, r9, r2, r6 \n\t" \
  1468. "umull r1, r14, r3, r5 \n\t" \
  1469. "adds r8, r8, r1 \n\t" \
  1470. "adcs r9, r9, r14 \n\t" \
  1471. "adc r10, r10, #0 \n\t" \
  1472. "adds r8, r8, r8 \n\t" \
  1473. "adcs r9, r9, r9 \n\t" \
  1474. "adc r10, r10, r10 \n\t" \
  1475. "umull r1, r14, r4, r4 \n\t" \
  1476. "adds r8, r8, r1 \n\t" \
  1477. "adcs r9, r9, r14 \n\t" \
  1478. "adc r10, r10, #0 \n\t" \
  1479. "adds r8, r8, r11 \n\t" \
  1480. "adcs r9, r9, r12 \n\t" \
  1481. "adc r10, r10, #0 \n\t" \
  1482. "stmia r0!, {r8} \n\t" \
  1483. \
  1484. "mov r12, #0 \n\t" \
  1485. "umull r8, r11, r3, r6 \n\t" \
  1486. "umull r1, r14, r4, r5 \n\t" \
  1487. "adds r8, r8, r1 \n\t" \
  1488. "adcs r11, r11, r14 \n\t" \
  1489. "adc r12, r12, #0 \n\t" \
  1490. "adds r8, r8, r8 \n\t" \
  1491. "adcs r11, r11, r11 \n\t" \
  1492. "adc r12, r12, r12 \n\t" \
  1493. "adds r8, r8, r9 \n\t" \
  1494. "adcs r11, r11, r10 \n\t" \
  1495. "adc r12, r12, #0 \n\t" \
  1496. "stmia r0!, {r8} \n\t" \
  1497. \
  1498. "mov r8, #0 \n\t" \
  1499. "umull r1, r10, r4, r6 \n\t" \
  1500. "adds r1, r1, r1 \n\t" \
  1501. "adcs r10, r10, r10 \n\t" \
  1502. "adc r8, r8, #0 \n\t" \
  1503. "adds r11, r11, r1 \n\t" \
  1504. "adcs r12, r12, r10 \n\t" \
  1505. "adc r8, r8, #0 \n\t" \
  1506. "umull r1, r10, r5, r5 \n\t" \
  1507. "adds r11, r11, r1 \n\t" \
  1508. "adcs r12, r12, r10 \n\t" \
  1509. "adc r8, r8, #0 \n\t" \
  1510. "stmia r0!, {r11} \n\t" \
  1511. \
  1512. "mov r11, #0 \n\t" \
  1513. "umull r1, r10, r5, r6 \n\t" \
  1514. "adds r1, r1, r1 \n\t" \
  1515. "adcs r10, r10, r10 \n\t" \
  1516. "adc r11, r11, #0 \n\t" \
  1517. "adds r12, r12, r1 \n\t" \
  1518. "adcs r8, r8, r10 \n\t" \
  1519. "adc r11, r11, #0 \n\t" \
  1520. "stmia r0!, {r12} \n\t" \
  1521. \
  1522. "umull r1, r10, r6, r6 \n\t" \
  1523. "adds r8, r8, r1 \n\t" \
  1524. "adcs r11, r11, r10 \n\t" \
  1525. "stmia r0!, {r8, r11} \n\t" \
  1526. "pop {r1, r2} \n\t"
  1527. #define FAST_SQUARE_ASM_5_TO_6 \
  1528. "cmp r2, #5 \n\t" \
  1529. "beq 1f \n\t" \
  1530. \
  1531. "sub r0, #20 \n\t" \
  1532. "sub r1, #20 \n\t" \
  1533. \
  1534. /* Do off-center multiplication */ \
  1535. "ldmia r1!, {r6,r7,r8,r9,r10,r11} \n\t" \
  1536. "umull r3, r4, r6, r11 \n\t" \
  1537. "umull r6, r5, r7, r11 \n\t" \
  1538. "adds r4, r4, r6 \n\t" \
  1539. "umull r7, r6, r8, r11 \n\t" \
  1540. "adcs r5, r5, r7 \n\t" \
  1541. "umull r8, r7, r9, r11 \n\t" \
  1542. "adcs r6, r6, r8 \n\t" \
  1543. "umull r9, r8, r10, r11 \n\t" \
  1544. "adcs r7, r7, r9 \n\t" \
  1545. "adcs r8, r8, #0 \n\t" \
  1546. \
  1547. /* Multiply by 2 */ \
  1548. "mov r9, #0 \n\t" \
  1549. "adds r3, r3, r3 \n\t" \
  1550. "adcs r4, r4, r4 \n\t" \
  1551. "adcs r5, r5, r5 \n\t" \
  1552. "adcs r6, r6, r6 \n\t" \
  1553. "adcs r7, r7, r7 \n\t" \
  1554. "adcs r8, r8, r8 \n\t" \
  1555. "adcs r9, r9, #0 \n\t" \
  1556. \
  1557. /* Add into previous */ \
  1558. "ldr r14, [r0], #4 \n\t" \
  1559. "adds r3, r3, r14 \n\t" \
  1560. "ldr r14, [r0], #4 \n\t" \
  1561. "adcs r4, r4, r14 \n\t" \
  1562. "ldr r14, [r0], #4 \n\t" \
  1563. "adcs r5, r5, r14 \n\t" \
  1564. "ldr r14, [r0], #4 \n\t" \
  1565. "adcs r6, r6, r14 \n\t" \
  1566. "ldr r14, [r0], #4 \n\t" \
  1567. "adcs r7, r7, r14 \n\t" \
  1568. "adcs r8, r8, #0 \n\t" \
  1569. "adcs r9, r9, #0 \n\t" \
  1570. "sub r0, #20 \n\t" \
  1571. \
  1572. /* Perform center multiplication */ \
  1573. "umlal r8, r9, r11, r11 \n\t" \
  1574. "stmia r0!, {r3,r4,r5,r6,r7,r8,r9} \n\t"
  1575. #define FAST_SQUARE_ASM_6 \
  1576. "push {r2} \n\t" \
  1577. "ldmia r1!, {r2,r3,r4,r5,r6,r7} \n\t" \
  1578. "push {r1} \n\t" \
  1579. \
  1580. "umull r11, r12, r2, r2 \n\t" \
  1581. "stmia r0!, {r11} \n\t" \
  1582. \
  1583. "mov r9, #0 \n\t" \
  1584. "umull r10, r11, r2, r3 \n\t" \
  1585. "adds r12, r12, r10 \n\t" \
  1586. "adcs r8, r11, #0 \n\t" \
  1587. "adc r9, r9, #0 \n\t" \
  1588. "adds r12, r12, r10 \n\t" \
  1589. "adcs r8, r8, r11 \n\t" \
  1590. "adc r9, r9, #0 \n\t" \
  1591. "stmia r0!, {r12} \n\t" \
  1592. \
  1593. "mov r10, #0 \n\t" \
  1594. "umull r11, r12, r2, r4 \n\t" \
  1595. "adds r11, r11, r11 \n\t" \
  1596. "adcs r12, r12, r12 \n\t" \
  1597. "adc r10, r10, #0 \n\t" \
  1598. "adds r8, r8, r11 \n\t" \
  1599. "adcs r9, r9, r12 \n\t" \
  1600. "adc r10, r10, #0 \n\t" \
  1601. "umull r11, r12, r3, r3 \n\t" \
  1602. "adds r8, r8, r11 \n\t" \
  1603. "adcs r9, r9, r12 \n\t" \
  1604. "adc r10, r10, #0 \n\t" \
  1605. "stmia r0!, {r8} \n\t" \
  1606. \
  1607. "mov r12, #0 \n\t" \
  1608. "umull r8, r11, r2, r5 \n\t" \
  1609. "umull r1, r14, r3, r4 \n\t" \
  1610. "adds r8, r8, r1 \n\t" \
  1611. "adcs r11, r11, r14 \n\t" \
  1612. "adc r12, r12, #0 \n\t" \
  1613. "adds r8, r8, r8 \n\t" \
  1614. "adcs r11, r11, r11 \n\t" \
  1615. "adc r12, r12, r12 \n\t" \
  1616. "adds r8, r8, r9 \n\t" \
  1617. "adcs r11, r11, r10 \n\t" \
  1618. "adc r12, r12, #0 \n\t" \
  1619. "stmia r0!, {r8} \n\t" \
  1620. \
  1621. "mov r10, #0 \n\t" \
  1622. "umull r8, r9, r2, r6 \n\t" \
  1623. "umull r1, r14, r3, r5 \n\t" \
  1624. "adds r8, r8, r1 \n\t" \
  1625. "adcs r9, r9, r14 \n\t" \
  1626. "adc r10, r10, #0 \n\t" \
  1627. "adds r8, r8, r8 \n\t" \
  1628. "adcs r9, r9, r9 \n\t" \
  1629. "adc r10, r10, r10 \n\t" \
  1630. "umull r1, r14, r4, r4 \n\t" \
  1631. "adds r8, r8, r1 \n\t" \
  1632. "adcs r9, r9, r14 \n\t" \
  1633. "adc r10, r10, #0 \n\t" \
  1634. "adds r8, r8, r11 \n\t" \
  1635. "adcs r9, r9, r12 \n\t" \
  1636. "adc r10, r10, #0 \n\t" \
  1637. "stmia r0!, {r8} \n\t" \
  1638. \
  1639. "mov r12, #0 \n\t" \
  1640. "umull r8, r11, r2, r7 \n\t" \
  1641. "umull r1, r14, r3, r6 \n\t" \
  1642. "adds r8, r8, r1 \n\t" \
  1643. "adcs r11, r11, r14 \n\t" \
  1644. "adc r12, r12, #0 \n\t" \
  1645. "umull r1, r14, r4, r5 \n\t" \
  1646. "adds r8, r8, r1 \n\t" \
  1647. "adcs r11, r11, r14 \n\t" \
  1648. "adc r12, r12, #0 \n\t" \
  1649. "adds r8, r8, r8 \n\t" \
  1650. "adcs r11, r11, r11 \n\t" \
  1651. "adc r12, r12, r12 \n\t" \
  1652. "adds r8, r8, r9 \n\t" \
  1653. "adcs r11, r11, r10 \n\t" \
  1654. "adc r12, r12, #0 \n\t" \
  1655. "stmia r0!, {r8} \n\t" \
  1656. \
  1657. "mov r10, #0 \n\t" \
  1658. "umull r8, r9, r3, r7 \n\t" \
  1659. "umull r1, r14, r4, r6 \n\t" \
  1660. "adds r8, r8, r1 \n\t" \
  1661. "adcs r9, r9, r14 \n\t" \
  1662. "adc r10, r10, #0 \n\t" \
  1663. "adds r8, r8, r8 \n\t" \
  1664. "adcs r9, r9, r9 \n\t" \
  1665. "adc r10, r10, r10 \n\t" \
  1666. "umull r1, r14, r5, r5 \n\t" \
  1667. "adds r8, r8, r1 \n\t" \
  1668. "adcs r9, r9, r14 \n\t" \
  1669. "adc r10, r10, #0 \n\t" \
  1670. "adds r8, r8, r11 \n\t" \
  1671. "adcs r9, r9, r12 \n\t" \
  1672. "adc r10, r10, #0 \n\t" \
  1673. "stmia r0!, {r8} \n\t" \
  1674. \
  1675. "mov r12, #0 \n\t" \
  1676. "umull r8, r11, r4, r7 \n\t" \
  1677. "umull r1, r14, r5, r6 \n\t" \
  1678. "adds r8, r8, r1 \n\t" \
  1679. "adcs r11, r11, r14 \n\t" \
  1680. "adc r12, r12, #0 \n\t" \
  1681. "adds r8, r8, r8 \n\t" \
  1682. "adcs r11, r11, r11 \n\t" \
  1683. "adc r12, r12, r12 \n\t" \
  1684. "adds r8, r8, r9 \n\t" \
  1685. "adcs r11, r11, r10 \n\t" \
  1686. "adc r12, r12, #0 \n\t" \
  1687. "stmia r0!, {r8} \n\t" \
  1688. \
  1689. "mov r8, #0 \n\t" \
  1690. "umull r1, r10, r5, r7 \n\t" \
  1691. "adds r1, r1, r1 \n\t" \
  1692. "adcs r10, r10, r10 \n\t" \
  1693. "adc r8, r8, #0 \n\t" \
  1694. "adds r11, r11, r1 \n\t" \
  1695. "adcs r12, r12, r10 \n\t" \
  1696. "adc r8, r8, #0 \n\t" \
  1697. "umull r1, r10, r6, r6 \n\t" \
  1698. "adds r11, r11, r1 \n\t" \
  1699. "adcs r12, r12, r10 \n\t" \
  1700. "adc r8, r8, #0 \n\t" \
  1701. "stmia r0!, {r11} \n\t" \
  1702. \
  1703. "mov r11, #0 \n\t" \
  1704. "umull r1, r10, r6, r7 \n\t" \
  1705. "adds r1, r1, r1 \n\t" \
  1706. "adcs r10, r10, r10 \n\t" \
  1707. "adc r11, r11, #0 \n\t" \
  1708. "adds r12, r12, r1 \n\t" \
  1709. "adcs r8, r8, r10 \n\t" \
  1710. "adc r11, r11, #0 \n\t" \
  1711. "stmia r0!, {r12} \n\t" \
  1712. \
  1713. "umull r1, r10, r7, r7 \n\t" \
  1714. "adds r8, r8, r1 \n\t" \
  1715. "adcs r11, r11, r10 \n\t" \
  1716. "stmia r0!, {r8, r11} \n\t" \
  1717. "pop {r1, r2} \n\t"
  1718. #define FAST_SQUARE_ASM_6_TO_7 \
  1719. "cmp r2, #6 \n\t" \
  1720. "beq 1f \n\t" \
  1721. \
  1722. "sub r0, #24 \n\t" \
  1723. "sub r1, #24 \n\t" \
  1724. \
  1725. /* Do off-center multiplication */ \
  1726. "ldmia r1!, {r6,r7,r8,r9,r10,r11,r12} \n\t" \
  1727. "umull r3, r4, r6, r12 \n\t" \
  1728. "umull r6, r5, r7, r12 \n\t" \
  1729. "adds r4, r4, r6 \n\t" \
  1730. "umull r7, r6, r8, r12 \n\t" \
  1731. "adcs r5, r5, r7 \n\t" \
  1732. "umull r8, r7, r9, r12 \n\t" \
  1733. "adcs r6, r6, r8 \n\t" \
  1734. "umull r9, r8, r10, r12 \n\t" \
  1735. "adcs r7, r7, r9 \n\t" \
  1736. "umull r10, r9, r11, r12 \n\t" \
  1737. "adcs r8, r8, r10 \n\t" \
  1738. "adcs r9, r9, #0 \n\t" \
  1739. \
  1740. /* Multiply by 2 */ \
  1741. "mov r10, #0 \n\t" \
  1742. "adds r3, r3, r3 \n\t" \
  1743. "adcs r4, r4, r4 \n\t" \
  1744. "adcs r5, r5, r5 \n\t" \
  1745. "adcs r6, r6, r6 \n\t" \
  1746. "adcs r7, r7, r7 \n\t" \
  1747. "adcs r8, r8, r8 \n\t" \
  1748. "adcs r9, r9, r9 \n\t" \
  1749. "adcs r10, r10, #0 \n\t" \
  1750. \
  1751. /* Add into previous */ \
  1752. "ldr r14, [r0], #4 \n\t" \
  1753. "adds r3, r3, r14 \n\t" \
  1754. "ldr r14, [r0], #4 \n\t" \
  1755. "adcs r4, r4, r14 \n\t" \
  1756. "ldr r14, [r0], #4 \n\t" \
  1757. "adcs r5, r5, r14 \n\t" \
  1758. "ldr r14, [r0], #4 \n\t" \
  1759. "adcs r6, r6, r14 \n\t" \
  1760. "ldr r14, [r0], #4 \n\t" \
  1761. "adcs r7, r7, r14 \n\t" \
  1762. "ldr r14, [r0], #4 \n\t" \
  1763. "adcs r8, r8, r14 \n\t" \
  1764. "adcs r9, r9, #0 \n\t" \
  1765. "adcs r10, r10, #0 \n\t" \
  1766. "sub r0, #24 \n\t" \
  1767. \
  1768. /* Perform center multiplication */ \
  1769. "umlal r9, r10, r12, r12 \n\t" \
  1770. "stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10} \n\t"
  1771. #define FAST_SQUARE_ASM_7 \
  1772. "push {r2} \n\t" \
  1773. "ldmia r1!, {r2, r3, r4, r5, r6, r7, r8} \n\t" \
  1774. "push {r1} \n\t" \
  1775. "sub r1, 4 \n\t" \
  1776. \
  1777. "add r0, 24 \n\t" \
  1778. "umull r9, r10, r2, r8 \n\t" \
  1779. "stmia r0!, {r9, r10} \n\t" \
  1780. "sub r0, 32 \n\t" \
  1781. \
  1782. "umull r11, r12, r2, r2 \n\t" \
  1783. "stmia r0!, {r11} \n\t" \
  1784. \
  1785. "mov r9, #0 \n\t" \
  1786. "umull r10, r11, r2, r3 \n\t" \
  1787. "adds r12, r12, r10 \n\t" \
  1788. "adcs r8, r11, #0 \n\t" \
  1789. "adc r9, r9, #0 \n\t" \
  1790. "adds r12, r12, r10 \n\t" \
  1791. "adcs r8, r8, r11 \n\t" \
  1792. "adc r9, r9, #0 \n\t" \
  1793. "stmia r0!, {r12} \n\t" \
  1794. \
  1795. "mov r10, #0 \n\t" \
  1796. "umull r11, r12, r2, r4 \n\t" \
  1797. "adds r11, r11, r11 \n\t" \
  1798. "adcs r12, r12, r12 \n\t" \
  1799. "adc r10, r10, #0 \n\t" \
  1800. "adds r8, r8, r11 \n\t" \
  1801. "adcs r9, r9, r12 \n\t" \
  1802. "adc r10, r10, #0 \n\t" \
  1803. "umull r11, r12, r3, r3 \n\t" \
  1804. "adds r8, r8, r11 \n\t" \
  1805. "adcs r9, r9, r12 \n\t" \
  1806. "adc r10, r10, #0 \n\t" \
  1807. "stmia r0!, {r8} \n\t" \
  1808. \
  1809. "mov r12, #0 \n\t" \
  1810. "umull r8, r11, r2, r5 \n\t" \
  1811. "mov r14, r11 \n\t" \
  1812. "umlal r8, r11, r3, r4 \n\t" \
  1813. "cmp r14, r11 \n\t" \
  1814. "it hi \n\t" \
  1815. "adchi r12, r12, #0 \n\t" \
  1816. "adds r8, r8, r8 \n\t" \
  1817. "adcs r11, r11, r11 \n\t" \
  1818. "adc r12, r12, r12 \n\t" \
  1819. "adds r8, r8, r9 \n\t" \
  1820. "adcs r11, r11, r10 \n\t" \
  1821. "adc r12, r12, #0 \n\t" \
  1822. "stmia r0!, {r8} \n\t" \
  1823. \
  1824. "mov r10, #0 \n\t" \
  1825. "umull r8, r9, r2, r6 \n\t" \
  1826. "mov r14, r9 \n\t" \
  1827. "umlal r8, r9, r3, r5 \n\t" \
  1828. "cmp r14, r9 \n\t" \
  1829. "it hi \n\t" \
  1830. "adchi r10, r10, #0 \n\t" \
  1831. "adds r8, r8, r8 \n\t" \
  1832. "adcs r9, r9, r9 \n\t" \
  1833. "adc r10, r10, r10 \n\t" \
  1834. "mov r14, r9 \n\t" \
  1835. "umlal r8, r9, r4, r4 \n\t" \
  1836. "cmp r14, r9 \n\t" \
  1837. "it hi \n\t" \
  1838. "adchi r10, r10, #0 \n\t" \
  1839. "adds r8, r8, r11 \n\t" \
  1840. "adcs r9, r9, r12 \n\t" \
  1841. "adc r10, r10, #0 \n\t" \
  1842. "stmia r0!, {r8} \n\t" \
  1843. \
  1844. "mov r12, #0 \n\t" \
  1845. "umull r8, r11, r2, r7 \n\t" \
  1846. "mov r14, r11 \n\t" \
  1847. "umlal r8, r11, r3, r6 \n\t" \
  1848. "cmp r14, r11 \n\t" \
  1849. "it hi \n\t" \
  1850. "adchi r12, r12, #0 \n\t" \
  1851. "mov r14, r11 \n\t" \
  1852. "umlal r8, r11, r4, r5 \n\t" \
  1853. "cmp r14, r11 \n\t" \
  1854. "it hi \n\t" \
  1855. "adchi r12, r12, #0 \n\t" \
  1856. "adds r8, r8, r8 \n\t" \
  1857. "adcs r11, r11, r11 \n\t" \
  1858. "adc r12, r12, r12 \n\t" \
  1859. "adds r8, r8, r9 \n\t" \
  1860. "adcs r11, r11, r10 \n\t" \
  1861. "adc r12, r12, #0 \n\t" \
  1862. "stmia r0!, {r8} \n\t" \
  1863. \
  1864. "ldmia r1!, {r2} \n\t" \
  1865. "mov r10, #0 \n\t" \
  1866. "umull r8, r9, r3, r7 \n\t" \
  1867. "mov r14, r9 \n\t" \
  1868. "umlal r8, r9, r4, r6 \n\t" \
  1869. "cmp r14, r9 \n\t" \
  1870. "it hi \n\t" \
  1871. "adchi r10, r10, #0 \n\t" \
  1872. "ldr r14, [r0] \n\t" \
  1873. "adds r8, r8, r14 \n\t" \
  1874. "adcs r9, r9, #0 \n\t" \
  1875. "adc r10, r10, #0 \n\t" \
  1876. "adds r8, r8, r8 \n\t" \
  1877. "adcs r9, r9, r9 \n\t" \
  1878. "adc r10, r10, r10 \n\t" \
  1879. "mov r14, r9 \n\t" \
  1880. "umlal r8, r9, r5, r5 \n\t" \
  1881. "cmp r14, r9 \n\t" \
  1882. "it hi \n\t" \
  1883. "adchi r10, r10, #0 \n\t" \
  1884. "adds r8, r8, r11 \n\t" \
  1885. "adcs r9, r9, r12 \n\t" \
  1886. "adc r10, r10, #0 \n\t" \
  1887. "stmia r0!, {r8} \n\t" \
  1888. \
  1889. "mov r12, #0 \n\t" \
  1890. "umull r8, r11, r3, r2 \n\t" \
  1891. "mov r14, r11 \n\t" \
  1892. "umlal r8, r11, r4, r7 \n\t" \
  1893. "cmp r14, r11 \n\t" \
  1894. "it hi \n\t" \
  1895. "adchi r12, r12, #0 \n\t" \
  1896. "mov r14, r11 \n\t" \
  1897. "umlal r8, r11, r5, r6 \n\t" \
  1898. "cmp r14, r11 \n\t" \
  1899. "it hi \n\t" \
  1900. "adchi r12, r12, #0 \n\t" \
  1901. "ldr r14, [r0] \n\t" \
  1902. "adds r8, r8, r14 \n\t" \
  1903. "adcs r11, r11, #0 \n\t" \
  1904. "adc r12, r12, #0 \n\t" \
  1905. "adds r8, r8, r8 \n\t" \
  1906. "adcs r11, r11, r11 \n\t" \
  1907. "adc r12, r12, r12 \n\t" \
  1908. "adds r8, r8, r9 \n\t" \
  1909. "adcs r11, r11, r10 \n\t" \
  1910. "adc r12, r12, #0 \n\t" \
  1911. "stmia r0!, {r8} \n\t" \
  1912. \
  1913. "mov r10, #0 \n\t" \
  1914. "umull r8, r9, r4, r2 \n\t" \
  1915. "mov r14, r9 \n\t" \
  1916. "umlal r8, r9, r5, r7 \n\t" \
  1917. "cmp r14, r9 \n\t" \
  1918. "it hi \n\t" \
  1919. "adchi r10, r10, #0 \n\t" \
  1920. "adds r8, r8, r8 \n\t" \
  1921. "adcs r9, r9, r9 \n\t" \
  1922. "adc r10, r10, r10 \n\t" \
  1923. "mov r14, r9 \n\t" \
  1924. "umlal r8, r9, r6, r6 \n\t" \
  1925. "cmp r14, r9 \n\t" \
  1926. "it hi \n\t" \
  1927. "adchi r10, r10, #0 \n\t" \
  1928. "adds r8, r8, r11 \n\t" \
  1929. "adcs r9, r9, r12 \n\t" \
  1930. "adc r10, r10, #0 \n\t" \
  1931. "stmia r0!, {r8} \n\t" \
  1932. \
  1933. "mov r12, #0 \n\t" \
  1934. "umull r8, r11, r5, r2 \n\t" \
  1935. "mov r14, r11 \n\t" \
  1936. "umlal r8, r11, r6, r7 \n\t" \
  1937. "cmp r14, r11 \n\t" \
  1938. "it hi \n\t" \
  1939. "adchi r12, r12, #0 \n\t" \
  1940. "adds r8, r8, r8 \n\t" \
  1941. "adcs r11, r11, r11 \n\t" \
  1942. "adc r12, r12, r12 \n\t" \
  1943. "adds r8, r8, r9 \n\t" \
  1944. "adcs r11, r11, r10 \n\t" \
  1945. "adc r12, r12, #0 \n\t" \
  1946. "stmia r0!, {r8} \n\t" \
  1947. \
  1948. "mov r8, #0 \n\t" \
  1949. "umull r1, r10, r6, r2 \n\t" \
  1950. "adds r1, r1, r1 \n\t" \
  1951. "adcs r10, r10, r10 \n\t" \
  1952. "adc r8, r8, #0 \n\t" \
  1953. "adds r11, r11, r1 \n\t" \
  1954. "adcs r12, r12, r10 \n\t" \
  1955. "adc r8, r8, #0 \n\t" \
  1956. "umull r1, r10, r7, r7 \n\t" \
  1957. "adds r11, r11, r1 \n\t" \
  1958. "adcs r12, r12, r10 \n\t" \
  1959. "adc r8, r8, #0 \n\t" \
  1960. "stmia r0!, {r11} \n\t" \
  1961. \
  1962. "mov r11, #0 \n\t" \
  1963. "umull r1, r10, r7, r2 \n\t" \
  1964. "adds r1, r1, r1 \n\t" \
  1965. "adcs r10, r10, r10 \n\t" \
  1966. "adc r11, r11, #0 \n\t" \
  1967. "adds r12, r12, r1 \n\t" \
  1968. "adcs r8, r8, r10 \n\t" \
  1969. "adc r11, r11, #0 \n\t" \
  1970. "stmia r0!, {r12} \n\t" \
  1971. \
  1972. "umull r1, r10, r2, r2 \n\t" \
  1973. "adds r8, r8, r1 \n\t" \
  1974. "adcs r11, r11, r10 \n\t" \
  1975. "stmia r0!, {r8, r11} \n\t" \
  1976. "pop {r1, r2} \n\t"
  1977. #define FAST_SQUARE_ASM_7_TO_8 \
  1978. "cmp r2, #7 \n\t" \
  1979. "beq 1f \n\t" \
  1980. \
  1981. "sub r0, #28 \n\t" \
  1982. "sub r1, #28 \n\t" \
  1983. \
  1984. /* Do off-center multiplication */ \
  1985. "ldmia r1!, {r6,r7,r8,r9,r10,r11,r12,r14} \n\t" \
  1986. "umull r3, r4, r6, r14 \n\t" \
  1987. "umull r6, r5, r7, r14 \n\t" \
  1988. "adds r4, r4, r6 \n\t" \
  1989. "umull r7, r6, r8, r14 \n\t" \
  1990. "adcs r5, r5, r7 \n\t" \
  1991. "umull r8, r7, r9, r14 \n\t" \
  1992. "adcs r6, r6, r8 \n\t" \
  1993. "umull r9, r8, r10, r14 \n\t" \
  1994. "adcs r7, r7, r9 \n\t" \
  1995. "umull r10, r9, r11, r14 \n\t" \
  1996. "adcs r8, r8, r10 \n\t" \
  1997. "umull r11, r10, r12, r14 \n\t" \
  1998. "adcs r9, r9, r11 \n\t" \
  1999. "adcs r10, r10, #0 \n\t" \
  2000. \
  2001. /* Multiply by 2 */ \
  2002. "mov r11, #0 \n\t" \
  2003. "adds r3, r3, r3 \n\t" \
  2004. "adcs r4, r4, r4 \n\t" \
  2005. "adcs r5, r5, r5 \n\t" \
  2006. "adcs r6, r6, r6 \n\t" \
  2007. "adcs r7, r7, r7 \n\t" \
  2008. "adcs r8, r8, r8 \n\t" \
  2009. "adcs r9, r9, r9 \n\t" \
  2010. "adcs r10, r10, r10 \n\t" \
  2011. "adcs r11, r11, #0 \n\t" \
  2012. \
  2013. /* Add into previous */ \
  2014. "ldr r12, [r0], #4 \n\t" \
  2015. "adds r3, r3, r12 \n\t" \
  2016. "ldr r12, [r0], #4 \n\t" \
  2017. "adcs r4, r4, r12 \n\t" \
  2018. "ldr r12, [r0], #4 \n\t" \
  2019. "adcs r5, r5, r12 \n\t" \
  2020. "ldr r12, [r0], #4 \n\t" \
  2021. "adcs r6, r6, r12 \n\t" \
  2022. "ldr r12, [r0], #4 \n\t" \
  2023. "adcs r7, r7, r12 \n\t" \
  2024. "ldr r12, [r0], #4 \n\t" \
  2025. "adcs r8, r8, r12 \n\t" \
  2026. "ldr r12, [r0], #4 \n\t" \
  2027. "adcs r9, r9, r12 \n\t" \
  2028. "adcs r10, r10, #0 \n\t" \
  2029. "adcs r11, r11, #0 \n\t" \
  2030. "sub r0, #28 \n\t" \
  2031. \
  2032. /* Perform center multiplication */ \
  2033. "umlal r10, r11, r14, r14 \n\t" \
  2034. "stmia r0!, {r3,r4,r5,r6,r7,r8,r9,r10,r11} \n\t"
  2035. #define FAST_SQUARE_ASM_8 \
  2036. "push {r2} \n\t" \
  2037. "ldmia r1!, {r2,r3,r4,r5,r6,r7,r8,r9} \n\t" \
  2038. "push {r1} \n\t" \
  2039. "sub r1, 8 \n\t" \
  2040. \
  2041. "add r0, 24 \n\t" \
  2042. "umull r10, r11, r2, r8 \n\t" \
  2043. "umull r12, r14, r2, r9 \n\t" \
  2044. "umull r8, r9, r3, r9 \n\t" \
  2045. "adds r11, r11, r12 \n\t" \
  2046. "adcs r12, r14, r8 \n\t" \
  2047. "adcs r14, r9, #0 \n\t" \
  2048. "stmia r0!, {r10, r11, r12, r14} \n\t" \
  2049. "sub r0, 40 \n\t" \
  2050. \
  2051. "umull r11, r12, r2, r2 \n\t" \
  2052. "stmia r0!, {r11} \n\t" \
  2053. \
  2054. "mov r9, #0 \n\t" \
  2055. "umull r10, r11, r2, r3 \n\t" \
  2056. "adds r12, r12, r10 \n\t" \
  2057. "adcs r8, r11, #0 \n\t" \
  2058. "adc r9, r9, #0 \n\t" \
  2059. "adds r12, r12, r10 \n\t" \
  2060. "adcs r8, r8, r11 \n\t" \
  2061. "adc r9, r9, #0 \n\t" \
  2062. "stmia r0!, {r12} \n\t" \
  2063. \
  2064. "mov r10, #0 \n\t" \
  2065. "umull r11, r12, r2, r4 \n\t" \
  2066. "adds r11, r11, r11 \n\t" \
  2067. "adcs r12, r12, r12 \n\t" \
  2068. "adc r10, r10, #0 \n\t" \
  2069. "adds r8, r8, r11 \n\t" \
  2070. "adcs r9, r9, r12 \n\t" \
  2071. "adc r10, r10, #0 \n\t" \
  2072. "umull r11, r12, r3, r3 \n\t" \
  2073. "adds r8, r8, r11 \n\t" \
  2074. "adcs r9, r9, r12 \n\t" \
  2075. "adc r10, r10, #0 \n\t" \
  2076. "stmia r0!, {r8} \n\t" \
  2077. \
  2078. "mov r12, #0 \n\t" \
  2079. "umull r8, r11, r2, r5 \n\t" \
  2080. "mov r14, r11 \n\t" \
  2081. "umlal r8, r11, r3, r4 \n\t" \
  2082. "cmp r14, r11 \n\t" \
  2083. "it hi \n\t" \
  2084. "adchi r12, r12, #0 \n\t" \
  2085. "adds r8, r8, r8 \n\t" \
  2086. "adcs r11, r11, r11 \n\t" \
  2087. "adc r12, r12, r12 \n\t" \
  2088. "adds r8, r8, r9 \n\t" \
  2089. "adcs r11, r11, r10 \n\t" \
  2090. "adc r12, r12, #0 \n\t" \
  2091. "stmia r0!, {r8} \n\t" \
  2092. \
  2093. "mov r10, #0 \n\t" \
  2094. "umull r8, r9, r2, r6 \n\t" \
  2095. "mov r14, r9 \n\t" \
  2096. "umlal r8, r9, r3, r5 \n\t" \
  2097. "cmp r14, r9 \n\t" \
  2098. "it hi \n\t" \
  2099. "adchi r10, r10, #0 \n\t" \
  2100. "adds r8, r8, r8 \n\t" \
  2101. "adcs r9, r9, r9 \n\t" \
  2102. "adc r10, r10, r10 \n\t" \
  2103. "mov r14, r9 \n\t" \
  2104. "umlal r8, r9, r4, r4 \n\t" \
  2105. "cmp r14, r9 \n\t" \
  2106. "it hi \n\t" \
  2107. "adchi r10, r10, #0 \n\t" \
  2108. "adds r8, r8, r11 \n\t" \
  2109. "adcs r9, r9, r12 \n\t" \
  2110. "adc r10, r10, #0 \n\t" \
  2111. "stmia r0!, {r8} \n\t" \
  2112. \
  2113. "mov r12, #0 \n\t" \
  2114. "umull r8, r11, r2, r7 \n\t" \
  2115. "mov r14, r11 \n\t" \
  2116. "umlal r8, r11, r3, r6 \n\t" \
  2117. "cmp r14, r11 \n\t" \
  2118. "it hi \n\t" \
  2119. "adchi r12, r12, #0 \n\t" \
  2120. "mov r14, r11 \n\t" \
  2121. "umlal r8, r11, r4, r5 \n\t" \
  2122. "cmp r14, r11 \n\t" \
  2123. "it hi \n\t" \
  2124. "adchi r12, r12, #0 \n\t" \
  2125. "adds r8, r8, r8 \n\t" \
  2126. "adcs r11, r11, r11 \n\t" \
  2127. "adc r12, r12, r12 \n\t" \
  2128. "adds r8, r8, r9 \n\t" \
  2129. "adcs r11, r11, r10 \n\t" \
  2130. "adc r12, r12, #0 \n\t" \
  2131. "stmia r0!, {r8} \n\t" \
  2132. \
  2133. "ldmia r1!, {r2} \n\t" \
  2134. "mov r10, #0 \n\t" \
  2135. "umull r8, r9, r3, r7 \n\t" \
  2136. "mov r14, r9 \n\t" \
  2137. "umlal r8, r9, r4, r6 \n\t" \
  2138. "cmp r14, r9 \n\t" \
  2139. "it hi \n\t" \
  2140. "adchi r10, r10, #0 \n\t" \
  2141. "ldr r14, [r0] \n\t" \
  2142. "adds r8, r8, r14 \n\t" \
  2143. "adcs r9, r9, #0 \n\t" \
  2144. "adc r10, r10, #0 \n\t" \
  2145. "adds r8, r8, r8 \n\t" \
  2146. "adcs r9, r9, r9 \n\t" \
  2147. "adc r10, r10, r10 \n\t" \
  2148. "mov r14, r9 \n\t" \
  2149. "umlal r8, r9, r5, r5 \n\t" \
  2150. "cmp r14, r9 \n\t" \
  2151. "it hi \n\t" \
  2152. "adchi r10, r10, #0 \n\t" \
  2153. "adds r8, r8, r11 \n\t" \
  2154. "adcs r9, r9, r12 \n\t" \
  2155. "adc r10, r10, #0 \n\t" \
  2156. "stmia r0!, {r8} \n\t" \
  2157. \
  2158. "mov r12, #0 \n\t" \
  2159. "umull r8, r11, r3, r2 \n\t" \
  2160. "mov r14, r11 \n\t" \
  2161. "umlal r8, r11, r4, r7 \n\t" \
  2162. "cmp r14, r11 \n\t" \
  2163. "it hi \n\t" \
  2164. "adchi r12, r12, #0 \n\t" \
  2165. "mov r14, r11 \n\t" \
  2166. "umlal r8, r11, r5, r6 \n\t" \
  2167. "cmp r14, r11 \n\t" \
  2168. "it hi \n\t" \
  2169. "adchi r12, r12, #0 \n\t" \
  2170. "ldr r14, [r0] \n\t" \
  2171. "adds r8, r8, r14 \n\t" \
  2172. "adcs r11, r11, #0 \n\t" \
  2173. "adc r12, r12, #0 \n\t" \
  2174. "adds r8, r8, r8 \n\t" \
  2175. "adcs r11, r11, r11 \n\t" \
  2176. "adc r12, r12, r12 \n\t" \
  2177. "adds r8, r8, r9 \n\t" \
  2178. "adcs r11, r11, r10 \n\t" \
  2179. "adc r12, r12, #0 \n\t" \
  2180. "stmia r0!, {r8} \n\t" \
  2181. \
  2182. "ldmia r1!, {r3} \n\t" \
  2183. "mov r10, #0 \n\t" \
  2184. "umull r8, r9, r4, r2 \n\t" \
  2185. "mov r14, r9 \n\t" \
  2186. "umlal r8, r9, r5, r7 \n\t" \
  2187. "cmp r14, r9 \n\t" \
  2188. "it hi \n\t" \
  2189. "adchi r10, r10, #0 \n\t" \
  2190. "ldr r14, [r0] \n\t" \
  2191. "adds r8, r8, r14 \n\t" \
  2192. "adcs r9, r9, #0 \n\t" \
  2193. "adc r10, r10, #0 \n\t" \
  2194. "adds r8, r8, r8 \n\t" \
  2195. "adcs r9, r9, r9 \n\t" \
  2196. "adc r10, r10, r10 \n\t" \
  2197. "mov r14, r9 \n\t" \
  2198. "umlal r8, r9, r6, r6 \n\t" \
  2199. "cmp r14, r9 \n\t" \
  2200. "it hi \n\t" \
  2201. "adchi r10, r10, #0 \n\t" \
  2202. "adds r8, r8, r11 \n\t" \
  2203. "adcs r9, r9, r12 \n\t" \
  2204. "adc r10, r10, #0 \n\t" \
  2205. "stmia r0!, {r8} \n\t" \
  2206. \
  2207. "mov r12, #0 \n\t" \
  2208. "umull r8, r11, r4, r3 \n\t" \
  2209. "mov r14, r11 \n\t" \
  2210. "umlal r8, r11, r5, r2 \n\t" \
  2211. "cmp r14, r11 \n\t" \
  2212. "it hi \n\t" \
  2213. "adchi r12, r12, #0 \n\t" \
  2214. "mov r14, r11 \n\t" \
  2215. "umlal r8, r11, r6, r7 \n\t" \
  2216. "cmp r14, r11 \n\t" \
  2217. "it hi \n\t" \
  2218. "adchi r12, r12, #0 \n\t" \
  2219. "ldr r14, [r0] \n\t" \
  2220. "adds r8, r8, r14 \n\t" \
  2221. "adcs r11, r11, #0 \n\t" \
  2222. "adc r12, r12, #0 \n\t" \
  2223. "adds r8, r8, r8 \n\t" \
  2224. "adcs r11, r11, r11 \n\t" \
  2225. "adc r12, r12, r12 \n\t" \
  2226. "adds r8, r8, r9 \n\t" \
  2227. "adcs r11, r11, r10 \n\t" \
  2228. "adc r12, r12, #0 \n\t" \
  2229. "stmia r0!, {r8} \n\t" \
  2230. \
  2231. "mov r10, #0 \n\t" \
  2232. "umull r8, r9, r5, r3 \n\t" \
  2233. "mov r14, r9 \n\t" \
  2234. "umlal r8, r9, r6, r2 \n\t" \
  2235. "cmp r14, r9 \n\t" \
  2236. "it hi \n\t" \
  2237. "adchi r10, r10, #0 \n\t" \
  2238. "adds r8, r8, r8 \n\t" \
  2239. "adcs r9, r9, r9 \n\t" \
  2240. "adc r10, r10, r10 \n\t" \
  2241. "mov r14, r9 \n\t" \
  2242. "umlal r8, r9, r7, r7 \n\t" \
  2243. "cmp r14, r9 \n\t" \
  2244. "it hi \n\t" \
  2245. "adchi r10, r10, #0 \n\t" \
  2246. "adds r8, r8, r11 \n\t" \
  2247. "adcs r9, r9, r12 \n\t" \
  2248. "adc r10, r10, #0 \n\t" \
  2249. "stmia r0!, {r8} \n\t" \
  2250. \
  2251. "mov r12, #0 \n\t" \
  2252. "umull r8, r11, r6, r3 \n\t" \
  2253. "mov r14, r11 \n\t" \
  2254. "umlal r8, r11, r7, r2 \n\t" \
  2255. "cmp r14, r11 \n\t" \
  2256. "it hi \n\t" \
  2257. "adchi r12, r12, #0 \n\t" \
  2258. "adds r8, r8, r8 \n\t" \
  2259. "adcs r11, r11, r11 \n\t" \
  2260. "adc r12, r12, r12 \n\t" \
  2261. "adds r8, r8, r9 \n\t" \
  2262. "adcs r11, r11, r10 \n\t" \
  2263. "adc r12, r12, #0 \n\t" \
  2264. "stmia r0!, {r8} \n\t" \
  2265. \
  2266. "mov r8, #0 \n\t" \
  2267. "umull r1, r10, r7, r3 \n\t" \
  2268. "adds r1, r1, r1 \n\t" \
  2269. "adcs r10, r10, r10 \n\t" \
  2270. "adc r8, r8, #0 \n\t" \
  2271. "adds r11, r11, r1 \n\t" \
  2272. "adcs r12, r12, r10 \n\t" \
  2273. "adc r8, r8, #0 \n\t" \
  2274. "umull r1, r10, r2, r2 \n\t" \
  2275. "adds r11, r11, r1 \n\t" \
  2276. "adcs r12, r12, r10 \n\t" \
  2277. "adc r8, r8, #0 \n\t" \
  2278. "stmia r0!, {r11} \n\t" \
  2279. \
  2280. "mov r11, #0 \n\t" \
  2281. "umull r1, r10, r2, r3 \n\t" \
  2282. "adds r1, r1, r1 \n\t" \
  2283. "adcs r10, r10, r10 \n\t" \
  2284. "adc r11, r11, #0 \n\t" \
  2285. "adds r12, r12, r1 \n\t" \
  2286. "adcs r8, r8, r10 \n\t" \
  2287. "adc r11, r11, #0 \n\t" \
  2288. "stmia r0!, {r12} \n\t" \
  2289. \
  2290. "umull r1, r10, r3, r3 \n\t" \
  2291. "adds r8, r8, r1 \n\t" \
  2292. "adcs r11, r11, r10 \n\t" \
  2293. "stmia r0!, {r8, r11} \n\t" \
  2294. "pop {r1, r2} \n\t"
  2295. #endif /* _UECC_ASM_ARM_MULT_SQUARE_H_ */