fe_low_mem.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. /* fe_low_mem.c
  2. *
  3. * Copyright (C) 2006-2023 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. /* Based from Daniel Beer's public domain work. */
  22. #ifdef HAVE_CONFIG_H
  23. #include <config.h>
  24. #endif
  25. #include <wolfssl/wolfcrypt/settings.h>
  26. #if defined(HAVE_CURVE25519) || defined(HAVE_ED25519)
  27. #if defined(CURVE25519_SMALL) || defined(ED25519_SMALL) /* use slower code that takes less memory */
  28. #include <wolfssl/wolfcrypt/fe_operations.h>
  29. #ifdef NO_INLINE
  30. #include <wolfssl/wolfcrypt/misc.h>
  31. #else
  32. #define WOLFSSL_MISC_INCLUDED
  33. #include <wolfcrypt/src/misc.c>
  34. #endif
  35. void fprime_copy(byte *x, const byte *a)
  36. {
  37. int i;
  38. for (i = 0; i < F25519_SIZE; i++)
  39. x[i] = a[i];
  40. }
  41. void lm_copy(byte* x, const byte* a)
  42. {
  43. int i;
  44. for (i = 0; i < F25519_SIZE; i++)
  45. x[i] = a[i];
  46. }
  47. #if ((defined(HAVE_CURVE25519) && !defined(CURVE25519_SMALL)) || \
  48. (defined(HAVE_ED25519) && !defined(ED25519_SMALL))) && \
  49. !defined(FREESCALE_LTC_ECC)
  50. /* to be Complementary to fe_low_mem.c */
  51. #else
  52. void fe_init(void)
  53. {
  54. }
  55. #endif
  56. #ifdef CURVE25519_SMALL
  57. /* Double an X-coordinate */
  58. static void xc_double(byte *x3, byte *z3,
  59. const byte *x1, const byte *z1)
  60. {
  61. /* Explicit formulas database: dbl-1987-m
  62. *
  63. * source 1987 Montgomery "Speeding the Pollard and elliptic
  64. * curve methods of factorization", page 261, fourth display
  65. * compute X3 = (X1^2-Z1^2)^2
  66. * compute Z3 = 4 X1 Z1 (X1^2 + a X1 Z1 + Z1^2)
  67. */
  68. byte x1sq[F25519_SIZE];
  69. byte z1sq[F25519_SIZE];
  70. byte x1z1[F25519_SIZE];
  71. byte a[F25519_SIZE];
  72. fe_mul__distinct(x1sq, x1, x1);
  73. fe_mul__distinct(z1sq, z1, z1);
  74. fe_mul__distinct(x1z1, x1, z1);
  75. lm_sub(a, x1sq, z1sq);
  76. fe_mul__distinct(x3, a, a);
  77. fe_mul_c(a, x1z1, 486662);
  78. lm_add(a, x1sq, a);
  79. lm_add(a, z1sq, a);
  80. fe_mul__distinct(x1sq, x1z1, a);
  81. fe_mul_c(z3, x1sq, 4);
  82. }
  83. /* Differential addition */
  84. static void xc_diffadd(byte *x5, byte *z5,
  85. const byte *x1, const byte *z1,
  86. const byte *x2, const byte *z2,
  87. const byte *x3, const byte *z3)
  88. {
  89. /* Explicit formulas database: dbl-1987-m3
  90. *
  91. * source 1987 Montgomery "Speeding the Pollard and elliptic curve
  92. * methods of factorization", page 261, fifth display, plus
  93. * common-subexpression elimination
  94. * compute A = X2+Z2
  95. * compute B = X2-Z2
  96. * compute C = X3+Z3
  97. * compute D = X3-Z3
  98. * compute DA = D A
  99. * compute CB = C B
  100. * compute X5 = Z1(DA+CB)^2
  101. * compute Z5 = X1(DA-CB)^2
  102. */
  103. byte da[F25519_SIZE];
  104. byte cb[F25519_SIZE];
  105. byte a[F25519_SIZE];
  106. byte b[F25519_SIZE];
  107. lm_add(a, x2, z2);
  108. lm_sub(b, x3, z3); /* D */
  109. fe_mul__distinct(da, a, b);
  110. lm_sub(b, x2, z2);
  111. lm_add(a, x3, z3); /* C */
  112. fe_mul__distinct(cb, a, b);
  113. lm_add(a, da, cb);
  114. fe_mul__distinct(b, a, a);
  115. fe_mul__distinct(x5, z1, b);
  116. lm_sub(a, da, cb);
  117. fe_mul__distinct(b, a, a);
  118. fe_mul__distinct(z5, x1, b);
  119. }
  120. #ifndef FREESCALE_LTC_ECC
  121. int curve25519(byte *result, const byte *n, const byte *p)
  122. {
  123. /* Current point: P_m */
  124. byte xm[F25519_SIZE];
  125. byte zm[F25519_SIZE] = {1};
  126. /* Predecessor: P_(m-1) */
  127. byte xm1[F25519_SIZE] = {1};
  128. byte zm1[F25519_SIZE] = {0};
  129. int i;
  130. /* Note: bit 254 is assumed to be 1 */
  131. lm_copy(xm, p);
  132. for (i = 253; i >= 0; i--) {
  133. const int bit = (n[i >> 3] >> (i & 7)) & 1;
  134. byte xms[F25519_SIZE];
  135. byte zms[F25519_SIZE];
  136. /* From P_m and P_(m-1), compute P_(2m) and P_(2m-1) */
  137. xc_diffadd(xm1, zm1, p, f25519_one, xm, zm, xm1, zm1);
  138. xc_double(xm, zm, xm, zm);
  139. /* Compute P_(2m+1) */
  140. xc_diffadd(xms, zms, xm1, zm1, xm, zm, p, f25519_one);
  141. /* Select:
  142. * bit = 1 --> (P_(2m+1), P_(2m))
  143. * bit = 0 --> (P_(2m), P_(2m-1))
  144. */
  145. fe_select(xm1, xm1, xm, bit);
  146. fe_select(zm1, zm1, zm, bit);
  147. fe_select(xm, xm, xms, bit);
  148. fe_select(zm, zm, zms, bit);
  149. }
  150. /* Freeze out of projective coordinates */
  151. fe_inv__distinct(zm1, zm);
  152. fe_mul__distinct(result, zm1, xm);
  153. fe_normalize(result);
  154. return 0;
  155. }
  156. #endif /* !FREESCALE_LTC_ECC */
  157. #endif /* CURVE25519_SMALL */
  158. static void raw_add(byte *x, const byte *p)
  159. {
  160. word16 c = 0;
  161. int i;
  162. for (i = 0; i < F25519_SIZE; i++) {
  163. c += ((word16)x[i]) + ((word16)p[i]);
  164. x[i] = (byte)c;
  165. c >>= 8;
  166. }
  167. }
  168. static void raw_try_sub(byte *x, const byte *p)
  169. {
  170. byte minusp[F25519_SIZE];
  171. word16 c = 0;
  172. int i;
  173. for (i = 0; i < F25519_SIZE; i++) {
  174. c = ((word16)x[i]) - ((word16)p[i]) - c;
  175. minusp[i] = (byte)c;
  176. c = (c >> 8) & 1;
  177. }
  178. fprime_select(x, minusp, x, (byte)c);
  179. }
  180. static int prime_msb(const byte *p)
  181. {
  182. int i;
  183. byte x;
  184. int shift = 1;
  185. int z = F25519_SIZE - 1;
  186. /*
  187. Test for any hot bits.
  188. As soon as one instance is encountered set shift to 0.
  189. */
  190. for (i = F25519_SIZE - 1; i >= 0; i--) {
  191. shift &= ((shift ^ ((-p[i] | p[i]) >> 7)) & 1);
  192. z -= shift;
  193. }
  194. x = p[z];
  195. z <<= 3;
  196. shift = 1;
  197. for (i = 0; i < 8; i++) {
  198. shift &= ((-(x >> i) | (x >> i)) >> (7 - i) & 1);
  199. z += shift;
  200. }
  201. return z - 1;
  202. }
  203. void fprime_select(byte *dst, const byte *zero, const byte *one, byte condition)
  204. {
  205. const byte mask = -condition;
  206. int i;
  207. for (i = 0; i < F25519_SIZE; i++)
  208. dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
  209. }
  210. void fprime_add(byte *r, const byte *a, const byte *modulus)
  211. {
  212. raw_add(r, a);
  213. raw_try_sub(r, modulus);
  214. }
  215. void fprime_sub(byte *r, const byte *a, const byte *modulus)
  216. {
  217. raw_add(r, modulus);
  218. raw_try_sub(r, a);
  219. raw_try_sub(r, modulus);
  220. }
  221. void fprime_mul(byte *r, const byte *a, const byte *b,
  222. const byte *modulus)
  223. {
  224. word16 c = 0;
  225. int i,j;
  226. XMEMSET(r, 0, F25519_SIZE);
  227. for (i = prime_msb(modulus); i >= 0; i--) {
  228. const byte bit = (b[i >> 3] >> (i & 7)) & 1;
  229. byte plusa[F25519_SIZE];
  230. for (j = 0; j < F25519_SIZE; j++) {
  231. c |= ((word16)r[j]) << 1;
  232. r[j] = (byte)c;
  233. c >>= 8;
  234. }
  235. raw_try_sub(r, modulus);
  236. fprime_copy(plusa, r);
  237. fprime_add(plusa, a, modulus);
  238. fprime_select(r, r, plusa, bit);
  239. }
  240. }
  241. void fe_load(byte *x, word32 c)
  242. {
  243. word32 i;
  244. for (i = 0; i < sizeof(c); i++) {
  245. x[i] = c;
  246. c >>= 8;
  247. }
  248. for (; i < F25519_SIZE; i++)
  249. x[i] = 0;
  250. }
  251. void fe_normalize(byte *x)
  252. {
  253. byte minusp[F25519_SIZE];
  254. word16 c;
  255. int i;
  256. /* Reduce using 2^255 = 19 mod p */
  257. c = (x[31] >> 7) * 19;
  258. x[31] &= 127;
  259. for (i = 0; i < F25519_SIZE; i++) {
  260. c += x[i];
  261. x[i] = (byte)c;
  262. c >>= 8;
  263. }
  264. /* The number is now less than 2^255 + 18, and therefore less than
  265. * 2p. Try subtracting p, and conditionally load the subtracted
  266. * value if underflow did not occur.
  267. */
  268. c = 19;
  269. for (i = 0; i + 1 < F25519_SIZE; i++) {
  270. c += x[i];
  271. minusp[i] = (byte)c;
  272. c >>= 8;
  273. }
  274. c += ((word16)x[i]) - 128;
  275. minusp[31] = (byte)c;
  276. /* Load x-p if no underflow */
  277. fe_select(x, minusp, x, (c >> 15) & 1);
  278. }
  279. void fe_select(byte *dst,
  280. const byte *zero, const byte *one,
  281. byte condition)
  282. {
  283. const byte mask = -condition;
  284. int i;
  285. for (i = 0; i < F25519_SIZE; i++)
  286. dst[i] = zero[i] ^ (mask & (one[i] ^ zero[i]));
  287. }
  288. void lm_add(byte* r, const byte* a, const byte* b)
  289. {
  290. word16 c = 0;
  291. int i;
  292. /* Add */
  293. for (i = 0; i < F25519_SIZE; i++) {
  294. c >>= 8;
  295. c += ((word16)a[i]) + ((word16)b[i]);
  296. r[i] = (byte)c;
  297. }
  298. /* Reduce with 2^255 = 19 mod p */
  299. r[31] &= 127;
  300. c = (c >> 7) * 19;
  301. for (i = 0; i < F25519_SIZE; i++) {
  302. c += r[i];
  303. r[i] = (byte)c;
  304. c >>= 8;
  305. }
  306. }
  307. void lm_sub(byte* r, const byte* a, const byte* b)
  308. {
  309. word32 c = 0;
  310. int i;
  311. /* Calculate a + 2p - b, to avoid underflow */
  312. c = 218;
  313. for (i = 0; i + 1 < F25519_SIZE; i++) {
  314. c += 65280 + ((word32)a[i]) - ((word32)b[i]);
  315. r[i] = c;
  316. c >>= 8;
  317. }
  318. c += ((word32)a[31]) - ((word32)b[31]);
  319. r[31] = c & 127;
  320. c = (c >> 7) * 19;
  321. for (i = 0; i < F25519_SIZE; i++) {
  322. c += r[i];
  323. r[i] = c;
  324. c >>= 8;
  325. }
  326. }
  327. void lm_neg(byte* r, const byte* a)
  328. {
  329. word32 c = 0;
  330. int i;
  331. /* Calculate 2p - a, to avoid underflow */
  332. c = 218;
  333. for (i = 0; i + 1 < F25519_SIZE; i++) {
  334. c += 65280 - ((word32)a[i]);
  335. r[i] = c;
  336. c >>= 8;
  337. }
  338. c -= ((word32)a[31]);
  339. r[31] = c & 127;
  340. c = (c >> 7) * 19;
  341. for (i = 0; i < F25519_SIZE; i++) {
  342. c += r[i];
  343. r[i] = c;
  344. c >>= 8;
  345. }
  346. }
  347. void fe_mul__distinct(byte *r, const byte *a, const byte *b)
  348. {
  349. word32 c = 0;
  350. int i;
  351. for (i = 0; i < F25519_SIZE; i++) {
  352. int j;
  353. c >>= 8;
  354. for (j = 0; j <= i; j++)
  355. c += ((word32)a[j]) * ((word32)b[i - j]);
  356. for (; j < F25519_SIZE; j++)
  357. c += ((word32)a[j]) *
  358. ((word32)b[i + F25519_SIZE - j]) * 38;
  359. r[i] = c;
  360. }
  361. r[31] &= 127;
  362. c = (c >> 7) * 19;
  363. for (i = 0; i < F25519_SIZE; i++) {
  364. c += r[i];
  365. r[i] = c;
  366. c >>= 8;
  367. }
  368. }
  369. void lm_mul(byte *r, const byte* a, const byte *b)
  370. {
  371. byte tmp[F25519_SIZE];
  372. fe_mul__distinct(tmp, a, b);
  373. lm_copy(r, tmp);
  374. }
  375. void fe_mul_c(byte *r, const byte *a, word32 b)
  376. {
  377. word32 c = 0;
  378. int i;
  379. for (i = 0; i < F25519_SIZE; i++) {
  380. c >>= 8;
  381. c += b * ((word32)a[i]);
  382. r[i] = c;
  383. }
  384. r[31] &= 127;
  385. c >>= 7;
  386. c *= 19;
  387. for (i = 0; i < F25519_SIZE; i++) {
  388. c += r[i];
  389. r[i] = c;
  390. c >>= 8;
  391. }
  392. }
  393. void fe_inv__distinct(byte *r, const byte *x)
  394. {
  395. byte s[F25519_SIZE];
  396. int i;
  397. /* This is a prime field, so by Fermat's little theorem:
  398. *
  399. * x^(p-1) = 1 mod p
  400. *
  401. * Therefore, raise to (p-2) = 2^255-21 to get a multiplicative
  402. * inverse.
  403. *
  404. * This is a 255-bit binary number with the digits:
  405. *
  406. * 11111111... 01011
  407. *
  408. * We compute the result by the usual binary chain, but
  409. * alternate between keeping the accumulator in r and s, so as
  410. * to avoid copying temporaries.
  411. */
  412. /* 1 1 */
  413. fe_mul__distinct(s, x, x);
  414. fe_mul__distinct(r, s, x);
  415. /* 1 x 248 */
  416. for (i = 0; i < 248; i++) {
  417. fe_mul__distinct(s, r, r);
  418. fe_mul__distinct(r, s, x);
  419. }
  420. /* 0 */
  421. fe_mul__distinct(s, r, r);
  422. /* 1 */
  423. fe_mul__distinct(r, s, s);
  424. fe_mul__distinct(s, r, x);
  425. /* 0 */
  426. fe_mul__distinct(r, s, s);
  427. /* 1 */
  428. fe_mul__distinct(s, r, r);
  429. fe_mul__distinct(r, s, x);
  430. /* 1 */
  431. fe_mul__distinct(s, r, r);
  432. fe_mul__distinct(r, s, x);
  433. }
  434. void lm_invert(byte *r, const byte *x)
  435. {
  436. byte tmp[F25519_SIZE];
  437. fe_inv__distinct(tmp, x);
  438. lm_copy(r, tmp);
  439. }
  440. /* Raise x to the power of (p-5)/8 = 2^252-3, using s for temporary
  441. * storage.
  442. */
  443. static void exp2523(byte *r, const byte *x, byte *s)
  444. {
  445. int i;
  446. /* This number is a 252-bit number with the binary expansion:
  447. *
  448. * 111111... 01
  449. */
  450. /* 1 1 */
  451. fe_mul__distinct(r, x, x);
  452. fe_mul__distinct(s, r, x);
  453. /* 1 x 248 */
  454. for (i = 0; i < 248; i++) {
  455. fe_mul__distinct(r, s, s);
  456. fe_mul__distinct(s, r, x);
  457. }
  458. /* 0 */
  459. fe_mul__distinct(r, s, s);
  460. /* 1 */
  461. fe_mul__distinct(s, r, r);
  462. fe_mul__distinct(r, s, x);
  463. }
  464. void fe_sqrt(byte *r, const byte *a)
  465. {
  466. byte v[F25519_SIZE];
  467. byte i[F25519_SIZE];
  468. byte x[F25519_SIZE];
  469. byte y[F25519_SIZE];
  470. /* v = (2a)^((p-5)/8) [x = 2a] */
  471. fe_mul_c(x, a, 2);
  472. exp2523(v, x, y);
  473. /* i = 2av^2 - 1 */
  474. fe_mul__distinct(y, v, v);
  475. fe_mul__distinct(i, x, y);
  476. fe_load(y, 1);
  477. lm_sub(i, i, y);
  478. /* r = avi */
  479. fe_mul__distinct(x, v, a);
  480. fe_mul__distinct(r, x, i);
  481. }
  482. #endif /* CURVE25519_SMALL || ED25519_SMALL */
  483. #endif /* HAVE_CURVE25519 || HAVE_ED25519 */