bignum.c 61 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848
  1. /**
  2. * Copyright (c) 2013-2014 Tomas Dzetkulic
  3. * Copyright (c) 2013-2014 Pavol Rusnak
  4. * Copyright (c) 2015 Jochen Hoenicke
  5. * Copyright (c) 2016 Alex Beregszaszi
  6. *
  7. * Permission is hereby granted, free of charge, to any person obtaining
  8. * a copy of this software and associated documentation files (the "Software"),
  9. * to deal in the Software without restriction, including without limitation
  10. * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11. * and/or sell copies of the Software, and to permit persons to whom the
  12. * Software is furnished to do so, subject to the following conditions:
  13. *
  14. * The above copyright notice and this permission notice shall be included
  15. * in all copies or substantial portions of the Software.
  16. *
  17. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18. * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  20. * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
  21. * OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  22. * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  23. * OTHER DEALINGS IN THE SOFTWARE.
  24. */
  25. #include "bignum.h"
  26. #include <assert.h>
  27. #include <stdint.h>
  28. #include <stdio.h>
  29. #include <string.h>
  30. #include "memzero.h"
  31. #include "script.h"
  32. /*
  33. This library implements 256-bit numbers arithmetic.
  34. An unsigned 256-bit number is represented by a bignum256 structure, that is an
  35. array of nine 32-bit values called limbs. Limbs are digits of the number in
  36. the base 2**29 representation in the little endian order. This means that
  37. bignum256 x;
  38. represents the value
  39. sum([x[i] * 2**(29*i) for i in range(9)).
  40. A limb of a bignum256 is *normalized* iff it's less than 2**29.
  41. A bignum256 is *normalized* iff every its limb is normalized.
  42. A number is *fully reduced modulo p* iff it is less than p.
  43. A number is *partly reduced modulo p* iff is is less than 2*p.
  44. The number p is usually a prime number such that 2^256 - 2^224 <= p <= 2^256.
  45. All functions except bn_fast_mod expect that all their bignum256 inputs are
  46. normalized. (The function bn_fast_mod allows the input number to have the
  47. most significant limb unnormalized). All bignum256 outputs of all functions
  48. are guaranteed to be normalized.
  49. A number can be partly reduced with bn_fast_mod, a partly reduced number can
  50. be fully reduced with bn_mod.
  51. A function has *constant control flow with regard to its argument* iff the
  52. order in which instructions of the function are executed doesn't depend on the
  53. value of the argument.
  54. A function has *constant memory access flow with regard to its argument* iff
  55. the memory addresses that are acessed and the order in which they are accessed
  56. don't depend on the value of the argument.
  57. A function *has contant control (memory access) flow* iff it has constant
  58. control (memory access) flow with regard to all its arguments.
  59. The following function has contant control flow with regard to its arugment
  60. n, however is doesn't have constant memory access flow with regard to it:
  61. void (int n, int *a) }
  62. a[0] = 0;
  63. a[n] = 0; // memory address reveals the value of n
  64. }
  65. Unless stated otherwise all functions are supposed to have both constant
  66. control flow and constant memory access flow.
  67. */
  68. #define BN_MAX_DECIMAL_DIGITS \
  69. 79 // floor(log(2**(LIMBS * BITS_PER_LIMB), 10)) + 1
  70. // out_number = (bignum256) in_number
  71. // Assumes in_number is a raw bigendian 256-bit number
  72. // Guarantees out_number is normalized
  73. void bn_read_be(const uint8_t *in_number, bignum256 *out_number) {
  74. uint32_t temp = 0;
  75. for (int i = 0; i < BN_LIMBS - 1; i++) {
  76. uint32_t limb = read_be(in_number + (BN_LIMBS - 2 - i) * 4);
  77. temp |= limb << (BN_EXTRA_BITS * i);
  78. out_number->val[i] = temp & BN_LIMB_MASK;
  79. temp = limb >> (32 - BN_EXTRA_BITS * (i + 1));
  80. }
  81. out_number->val[BN_LIMBS - 1] = temp;
  82. }
  83. // out_number = (256BE) in_number
  84. // Assumes in_number < 2**256
  85. // Guarantess out_number is a raw bigendian 256-bit number
  86. void bn_write_be(const bignum256 *in_number, uint8_t *out_number) {
  87. uint32_t temp = in_number->val[BN_LIMBS - 1];
  88. for (int i = BN_LIMBS - 2; i >= 0; i--) {
  89. uint32_t limb = in_number->val[i];
  90. temp = (temp << (BN_BITS_PER_LIMB - BN_EXTRA_BITS * i)) |
  91. (limb >> (BN_EXTRA_BITS * i));
  92. write_be(out_number + (BN_LIMBS - 2 - i) * 4, temp);
  93. temp = limb;
  94. }
  95. }
  96. // out_number = (bignum256) in_number
  97. // Assumes in_number is a raw little endian 256-bit number
  98. // Guarantees out_number is normalized
  99. void bn_read_le(const uint8_t *in_number, bignum256 *out_number) {
  100. uint32_t temp = 0;
  101. for (int i = 0; i < BN_LIMBS - 1; i++) {
  102. uint32_t limb = read_le(in_number + i * 4);
  103. temp |= limb << (BN_EXTRA_BITS * i);
  104. out_number->val[i] = temp & BN_LIMB_MASK;
  105. temp = limb >> (32 - BN_EXTRA_BITS * (i + 1));
  106. }
  107. out_number->val[BN_LIMBS - 1] = temp;
  108. }
  109. // out_number = (256LE) in_number
  110. // Assumes in_number < 2**256
  111. // Guarantess out_number is a raw little endian 256-bit number
  112. void bn_write_le(const bignum256 *in_number, uint8_t *out_number) {
  113. uint32_t temp = in_number->val[BN_LIMBS - 1];
  114. for (int i = BN_LIMBS - 2; i >= 0; i--) {
  115. uint32_t limb = in_number->val[i];
  116. temp = (temp << (BN_BITS_PER_LIMB - BN_EXTRA_BITS * i)) |
  117. (limb >> (BN_EXTRA_BITS * i));
  118. write_le(out_number + i * 4, temp);
  119. temp = limb;
  120. }
  121. }
  122. // out_number = (bignum256) in_number
  123. // Guarantees out_number is normalized
  124. void bn_read_uint32(uint32_t in_number, bignum256 *out_number) {
  125. out_number->val[0] = in_number & BN_LIMB_MASK;
  126. out_number->val[1] = in_number >> BN_BITS_PER_LIMB;
  127. for (uint32_t i = 2; i < BN_LIMBS; i++) out_number->val[i] = 0;
  128. }
  129. // out_number = (bignum256) in_number
  130. // Guarantees out_number is normalized
  131. void bn_read_uint64(uint64_t in_number, bignum256 *out_number) {
  132. out_number->val[0] = in_number & BN_LIMB_MASK;
  133. out_number->val[1] = (in_number >>= BN_BITS_PER_LIMB) & BN_LIMB_MASK;
  134. out_number->val[2] = in_number >> BN_BITS_PER_LIMB;
  135. for (uint32_t i = 3; i < BN_LIMBS; i++) out_number->val[i] = 0;
  136. }
  137. // Returns the bitsize of x
  138. // Assumes x is normalized
  139. // The function doesn't have neither constant control flow nor constant memory
  140. // access flow
  141. int bn_bitcount(const bignum256 *x) {
  142. for (int i = BN_LIMBS - 1; i >= 0; i--) {
  143. uint32_t limb = x->val[i];
  144. if (limb != 0) {
  145. // __builtin_clz returns the number of leading zero bits starting at the
  146. // most significant bit position
  147. return i * BN_BITS_PER_LIMB + (32 - __builtin_clz(limb));
  148. }
  149. }
  150. return 0;
  151. }
  152. // Returns the number of decimal digits of x; if x is 0, returns 1
  153. // Assumes x is normalized
  154. // The function doesn't have neither constant control flow nor constant memory
  155. // access flow
  156. unsigned int bn_digitcount(const bignum256 *x) {
  157. bignum256 val = {0};
  158. bn_copy(x, &val);
  159. unsigned int digits = 1;
  160. for (unsigned int i = 0; i < BN_MAX_DECIMAL_DIGITS; i += 3) {
  161. uint32_t limb = 0;
  162. bn_divmod1000(&val, &limb);
  163. if (limb >= 100) {
  164. digits = i + 3;
  165. } else if (limb >= 10) {
  166. digits = i + 2;
  167. } else if (limb >= 1) {
  168. digits = i + 1;
  169. }
  170. }
  171. memzero(&val, sizeof(val));
  172. return digits;
  173. }
  174. // x = 0
  175. // Guarantees x is normalized
  176. void bn_zero(bignum256 *x) {
  177. for (int i = 0; i < BN_LIMBS; i++) {
  178. x->val[i] = 0;
  179. }
  180. }
  181. // x = 1
  182. // Guarantees x is normalized
  183. void bn_one(bignum256 *x) {
  184. x->val[0] = 1;
  185. for (int i = 1; i < BN_LIMBS; i++) {
  186. x->val[i] = 0;
  187. }
  188. }
  189. // Returns x == 0
  190. // Assumes x is normalized
  191. int bn_is_zero(const bignum256 *x) {
  192. uint32_t result = 0;
  193. for (int i = 0; i < BN_LIMBS; i++) {
  194. result |= x->val[i];
  195. }
  196. return !result;
  197. }
  198. // Returns x == 1
  199. // Assumes x is normalized
  200. int bn_is_one(const bignum256 *x) {
  201. uint32_t result = x->val[0] ^ 1;
  202. for (int i = 1; i < BN_LIMBS; i++) {
  203. result |= x->val[i];
  204. }
  205. return !result;
  206. }
  207. // Returns x < y
  208. // Assumes x, y are normalized
  209. int bn_is_less(const bignum256 *x, const bignum256 *y) {
  210. uint32_t res1 = 0;
  211. uint32_t res2 = 0;
  212. for (int i = BN_LIMBS - 1; i >= 0; i--) {
  213. res1 = (res1 << 1) | (x->val[i] < y->val[i]);
  214. res2 = (res2 << 1) | (x->val[i] > y->val[i]);
  215. }
  216. return res1 > res2;
  217. }
  218. // Returns x == y
  219. // Assumes x, y are normalized
  220. int bn_is_equal(const bignum256 *x, const bignum256 *y) {
  221. uint32_t result = 0;
  222. for (int i = 0; i < BN_LIMBS; i++) {
  223. result |= x->val[i] ^ y->val[i];
  224. }
  225. return !result;
  226. }
  227. // res = cond if truecase else falsecase
  228. // Assumes cond is either 0 or 1
  229. // Works properly even if &res == &truecase or &res == &falsecase or
  230. // &truecase == &falsecase or &res == &truecase == &falsecase
  231. void bn_cmov(bignum256 *res, volatile uint32_t cond, const bignum256 *truecase,
  232. const bignum256 *falsecase) {
  233. // Intentional use of bitwise OR operator to ensure constant-time
  234. assert((int)(cond == 1) | (int)(cond == 0));
  235. uint32_t tmask = -cond; // tmask = 0xFFFFFFFF if cond else 0x00000000
  236. uint32_t fmask = ~tmask; // fmask = 0x00000000 if cond else 0xFFFFFFFF
  237. for (int i = 0; i < BN_LIMBS; i++) {
  238. res->val[i] = (truecase->val[i] & tmask) | (falsecase->val[i] & fmask);
  239. }
  240. }
  241. // x = -x % prime if cond else x,
  242. // Explicitly x = (3 * prime - x if x > prime else 2 * prime - x) if cond else
  243. // else (x if x > prime else x + prime)
  244. // Assumes x is normalized and partly reduced
  245. // Assumes cond is either 1 or 0
  246. // Guarantees x is normalized
  247. // Assumes prime is normalized and
  248. // 0 < prime < 2**260 == 2**(BITS_PER_LIMB * LIMBS - 1)
  249. void bn_cnegate(volatile uint32_t cond, bignum256 *x, const bignum256 *prime) {
  250. // Intentional use of bitwise OR operator to ensure constant time
  251. assert((int)(cond == 1) | (int)(cond == 0));
  252. uint32_t tmask = -cond; // tmask = 0xFFFFFFFF if cond else 0x00000000
  253. uint32_t fmask = ~tmask; // fmask = 0x00000000 if cond else 0xFFFFFFFF
  254. bn_mod(x, prime);
  255. // x < prime
  256. uint32_t acc1 = 1;
  257. uint32_t acc2 = 0;
  258. for (int i = 0; i < BN_LIMBS; i++) {
  259. acc1 += (BN_BASE - 1) + 2 * prime->val[i] - x->val[i];
  260. // acc1 neither overflows 32 bits nor underflows 0
  261. // Proof:
  262. // acc1 + (BASE - 1) + 2 * prime[i] - x[i]
  263. // >= (BASE - 1) - x >= (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1)
  264. // == 0
  265. // acc1 + (BASE - 1) + 2 * prime[i] - x[i]
  266. // <= acc1 + (BASE - 1) + 2 * prime[i]
  267. // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1) +
  268. // (2**BITS_PER_LIMB - 1)
  269. // == 7 + 3 * 2**29 < 2**32
  270. acc2 += prime->val[i] + x->val[i];
  271. // acc2 doesn't overflow 32 bits
  272. // Proof:
  273. // acc2 + prime[i] + x[i]
  274. // <= 2**(32 - BITS_PER_LIMB) - 1 + 2 * (2**BITS_PER_LIMB - 1)
  275. // == 2**(32 - BITS_PER_LIMB) + 2**(BITS_PER_LIMB + 1) - 2
  276. // == 2**30 + 5 < 2**32
  277. // x = acc1 & LIMB_MASK if cond else acc2 & LIMB_MASK
  278. x->val[i] = ((acc1 & tmask) | (acc2 & fmask)) & BN_LIMB_MASK;
  279. acc1 >>= BN_BITS_PER_LIMB;
  280. // acc1 <= 7 == 2**(32 - BITS_PER_LIMB) - 1
  281. // acc1 == 2**(BITS_PER_LIMB * (i + 1)) + 2 * prime[:i + 1] - x[:i + 1]
  282. // >> BITS_PER_LIMB * (i + 1)
  283. acc2 >>= BN_BITS_PER_LIMB;
  284. // acc2 <= 7 == 2**(32 - BITS_PER_LIMB) - 1
  285. // acc2 == prime[:i + 1] + x[:i + 1] >> BITS_PER_LIMB * (i + 1)
  286. }
  287. // assert(acc1 == 1); // assert prime <= 2**260
  288. // assert(acc2 == 0);
  289. // clang-format off
  290. // acc1 == 1
  291. // Proof:
  292. // acc1 == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime[:LIMBS] - x[:LIMBS] >> BITS_PER_LIMB * LIMBS
  293. // == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime - x >> BITS_PER_LIMB * LIMBS
  294. // <= 2**(BITS_PER_LIMB * LIMBS) + 2 * prime >> BITS_PER_LIMB * LIMBS
  295. // <= 2**(BITS_PER_LIMB * LIMBS) + 2 * (2**(BITS_PER_LIMB * LIMBS - 1) - 1) >> BITS_PER_LIMB * LIMBS
  296. // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 2 >> BITS_PER_LIMB * LIMBS
  297. // == 1
  298. // acc1 == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime[:LIMBS] - x[:LIMBS] >> BITS_PER_LIMB * LIMBS
  299. // == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime - x >> BITS_PER_LIMB * LIMBS
  300. // >= 2**(BITS_PER_LIMB * LIMBS) + 0 >> BITS_PER_LIMB * LIMBS
  301. // == 1
  302. // acc2 == 0
  303. // Proof:
  304. // acc2 == prime[:LIMBS] + x[:LIMBS] >> BITS_PER_LIMB * LIMBS
  305. // == prime + x >> BITS_PER_LIMB * LIMBS
  306. // <= 2 * prime - 1 >> BITS_PER_LIMB * LIMBS
  307. // <= 2 * (2**(BITS_PER_LIMB * LIMBS - 1) - 1) - 1 >> 261
  308. // == 2**(BITS_PER_LIMB * LIMBS) - 3 >> BITS_PER_LIMB * LIMBS
  309. // == 0
  310. // clang-format on
  311. }
  312. // x <<= 1
  313. // Assumes x is normalized, x < 2**260 == 2**(LIMBS*BITS_PER_LIMB - 1)
  314. // Guarantees x is normalized
  315. void bn_lshift(bignum256 *x) {
  316. for (int i = BN_LIMBS - 1; i > 0; i--) {
  317. x->val[i] = ((x->val[i] << 1) & BN_LIMB_MASK) |
  318. (x->val[i - 1] >> (BN_BITS_PER_LIMB - 1));
  319. }
  320. x->val[0] = (x->val[0] << 1) & BN_LIMB_MASK;
  321. }
  322. // x >>= 1, i.e. x = floor(x/2)
  323. // Assumes x is normalized
  324. // Guarantees x is normalized
  325. // If x is partly reduced (fully reduced) modulo prime,
  326. // guarantess x will be partly reduced (fully reduced) modulo prime
  327. void bn_rshift(bignum256 *x) {
  328. for (int i = 0; i < BN_LIMBS - 1; i++) {
  329. x->val[i] =
  330. (x->val[i] >> 1) | ((x->val[i + 1] & 1) << (BN_BITS_PER_LIMB - 1));
  331. }
  332. x->val[BN_LIMBS - 1] >>= 1;
  333. }
  334. // Sets i-th least significant bit (counting from zero)
  335. // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
  336. // Guarantees x is normalized
  337. // The function has constant control flow but not constant memory access flow
  338. // with regard to i
  339. void bn_setbit(bignum256 *x, uint16_t i) {
  340. assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
  341. x->val[i / BN_BITS_PER_LIMB] |= (1u << (i % BN_BITS_PER_LIMB));
  342. }
  343. // clears i-th least significant bit (counting from zero)
  344. // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
  345. // Guarantees x is normalized
  346. // The function has constant control flow but not constant memory access flow
  347. // with regard to i
  348. void bn_clearbit(bignum256 *x, uint16_t i) {
  349. assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
  350. x->val[i / BN_BITS_PER_LIMB] &= ~(1u << (i % BN_BITS_PER_LIMB));
  351. }
  352. // returns i-th least significant bit (counting from zero)
  353. // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
  354. // The function has constant control flow but not constant memory access flow
  355. // with regard to i
  356. uint32_t bn_testbit(const bignum256 *x, uint16_t i) {
  357. assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
  358. return (x->val[i / BN_BITS_PER_LIMB] >> (i % BN_BITS_PER_LIMB)) & 1;
  359. }
  360. // res = x ^ y
  361. // Assumes x, y are normalized
  362. // Guarantees res is normalized
  363. // Works properly even if &res == &x or &res == &y or &res == &x == &y
  364. void bn_xor(bignum256 *res, const bignum256 *x, const bignum256 *y) {
  365. for (int i = 0; i < BN_LIMBS; i++) {
  366. res->val[i] = x->val[i] ^ y->val[i];
  367. }
  368. }
  369. // x = x / 2 % prime
  370. // Explicitly x = x / 2 if is_even(x) else (x + prime) / 2
  371. // Assumes x is normalized, x + prime < 261 == LIMBS * BITS_PER_LIMB
  372. // Guarantees x is normalized
  373. // If x is partly reduced (fully reduced) modulo prime,
  374. // guarantess x will be partly reduced (fully reduced) modulo prime
  375. // Assumes prime is an odd number and normalized
  376. void bn_mult_half(bignum256 *x, const bignum256 *prime) {
  377. // x = x / 2 if is_even(x) else (x + prime) / 2
  378. uint32_t x_is_odd_mask =
  379. -(x->val[0] & 1); // x_is_odd_mask = 0xFFFFFFFF if is_odd(x) else 0
  380. uint32_t acc = (x->val[0] + (prime->val[0] & x_is_odd_mask)) >> 1;
  381. // acc < 2**BITS_PER_LIMB
  382. // Proof:
  383. // acc == x[0] + prime[0] & x_is_odd_mask >> 1
  384. // <= (2**(BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB) - 1) >> 1
  385. // == 2**(BITS_PER_LIMB + 1) - 2 >> 1
  386. // < 2**(BITS_PER_LIMB)
  387. for (int i = 0; i < BN_LIMBS - 1; i++) {
  388. uint32_t temp = (x->val[i + 1] + (prime->val[i + 1] & x_is_odd_mask));
  389. // temp < 2**(BITS_PER_LIMB + 1)
  390. // Proof:
  391. // temp == x[i + 1] + val[i + 1] & x_is_odd_mask
  392. // <= (2**(BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB) - 1)
  393. // < 2**(BITS_PER_LIMB + 1)
  394. acc += (temp & 1) << (BN_BITS_PER_LIMB - 1);
  395. // acc doesn't overflow 32 bits
  396. // Proof:
  397. // acc + (temp & 1 << BITS_PER_LIMB - 1)
  398. // <= 2**(BITS_PER_LIMB + 1) + 2**(BITS_PER_LIMB - 1)
  399. // <= 2**30 + 2**28 < 2**32
  400. x->val[i] = acc & BN_LIMB_MASK;
  401. acc >>= BN_BITS_PER_LIMB;
  402. acc += temp >> 1;
  403. // acc < 2**(BITS_PER_LIMB + 1)
  404. // Proof:
  405. // acc + (temp >> 1)
  406. // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB + 1) - 1 >> 1)
  407. // == 7 + 2**(BITS_PER_LIMB) - 1 < 2**(BITS_PER_LIMB + 1)
  408. // acc == x[:i+2]+(prime[:i+2] & x_is_odd_mask) >> BITS_PER_LIMB * (i+1)
  409. }
  410. x->val[BN_LIMBS - 1] = acc;
  411. // assert(acc >> BITS_PER_LIMB == 0);
  412. // acc >> BITS_PER_LIMB == 0
  413. // Proof:
  414. // acc
  415. // == x[:LIMBS] + (prime[:LIMBS] & x_is_odd_mask) >> BITS_PER_LIMB*LIMBS
  416. // == x + (prime & x_is_odd_mask) >> BITS_PER_LIMB * LIMBS
  417. // <= x + prime >> BITS_PER_LIMB * LIMBS
  418. // <= 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
  419. // == 0
  420. }
  421. // x = x * k % prime
  422. // Assumes x is normalized, 0 <= k <= 8 = 2**(32 - BITS_PER_LIMB)
  423. // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
  424. // Guarantees x is normalized and partly reduced modulo prime
  425. void bn_mult_k(bignum256 *x, uint8_t k, const bignum256 *prime) {
  426. assert(k <= 8);
  427. for (int i = 0; i < BN_LIMBS; i++) {
  428. x->val[i] = k * x->val[i];
  429. // x[i] doesn't overflow 32 bits
  430. // k * x[i] <= 2**(32 - BITS_PER_LIMB) * (2**BITS_PER_LIMB - 1)
  431. // < 2**(32 - BITS_PER_LIMB) * 2**BITS_PER_LIMB == 2**32
  432. }
  433. bn_fast_mod(x, prime);
  434. }
  435. // Reduces partly reduced x modulo prime
  436. // Explicitly x = x if x < prime else x - prime
  437. // Assumes x is partly reduced modulo prime
  438. // Guarantees x is fully reduced modulo prime
  439. // Assumes prime is nonzero and normalized
  440. void bn_mod(bignum256 *x, const bignum256 *prime) {
  441. uint32_t x_less_prime = bn_is_less(x, prime);
  442. bignum256 temp = {0};
  443. bn_subtract(x, prime, &temp);
  444. bn_cmov(x, x_less_prime, x, &temp);
  445. memzero(&temp, sizeof(temp));
  446. }
  447. // Auxiliary function for bn_multiply
  448. // res = k * x
  449. // Assumes k and x are normalized
  450. // Guarantees res is normalized 18 digit little endian number in base 2**29
  451. void bn_multiply_long(const bignum256 *k, const bignum256 *x,
  452. uint32_t res[2 * BN_LIMBS]) {
  453. // Uses long multiplication in base 2**29, see
  454. // https://en.wikipedia.org/wiki/Multiplication_algorithm#Long_multiplication
  455. uint64_t acc = 0;
  456. // compute lower half
  457. for (int i = 0; i < BN_LIMBS; i++) {
  458. for (int j = 0; j <= i; j++) {
  459. acc += k->val[j] * (uint64_t)x->val[i - j];
  460. // acc doesn't overflow 64 bits
  461. // Proof:
  462. // acc <= acc + sum([k[j] * x[i-j] for j in range(i)])
  463. // <= (2**(64 - BITS_PER_LIMB) - 1) +
  464. // LIMBS * (2**BITS_PER_LIMB - 1) * (2**BITS_PER_LIMB - 1)
  465. // == (2**35 - 1) + 9 * (2**29 - 1) * (2**29 - 1)
  466. // <= 2**35 + 9 * 2**58 < 2**64
  467. }
  468. res[i] = acc & BN_LIMB_MASK;
  469. acc >>= BN_BITS_PER_LIMB;
  470. // acc <= 2**35 - 1 == 2**(64 - BITS_PER_LIMB) - 1
  471. }
  472. // compute upper half
  473. for (int i = BN_LIMBS; i < 2 * BN_LIMBS - 1; i++) {
  474. for (int j = i - BN_LIMBS + 1; j < BN_LIMBS; j++) {
  475. acc += k->val[j] * (uint64_t)x->val[i - j];
  476. // acc doesn't overflow 64 bits
  477. // Proof:
  478. // acc <= acc + sum([k[j] * x[i-j] for j in range(i)])
  479. // <= (2**(64 - BITS_PER_LIMB) - 1)
  480. // LIMBS * (2**BITS_PER_LIMB - 1) * (2**BITS_PER_LIMB - 1)
  481. // == (2**35 - 1) + 9 * (2**29 - 1) * (2**29 - 1)
  482. // <= 2**35 + 9 * 2**58 < 2**64
  483. }
  484. res[i] = acc & (BN_BASE - 1);
  485. acc >>= BN_BITS_PER_LIMB;
  486. // acc < 2**35 == 2**(64 - BITS_PER_LIMB)
  487. }
  488. res[2 * BN_LIMBS - 1] = acc;
  489. }
  490. // Auxiliary function for bn_multiply
  491. // Assumes 0 <= d <= 8 == LIMBS - 1
  492. // Assumes res is normalized and res < 2**(256 + 29*d + 31)
  493. // Guarantess res in normalized and res < 2 * prime * 2**(29*d)
  494. // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
  495. void bn_multiply_reduce_step(uint32_t res[2 * BN_LIMBS], const bignum256 *prime,
  496. uint32_t d) {
  497. // clang-format off
  498. // Computes res = res - (res // 2**(256 + BITS_PER_LIMB * d)) * prime * 2**(BITS_PER_LIMB * d)
  499. // res - (res // 2**(256 + BITS_PER_LIMB * d)) * prime * 2**(BITS_PER_LIMB * d) < 2 * prime * 2**(BITS_PER_LIMB * d)
  500. // Proof:
  501. // res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * prime
  502. // == res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * (2**256 - (2**256 - prime))
  503. // == res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * 2**256 + res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * (2**256 - prime)
  504. // == (res % 2**(256 + BITS_PER_LIMB * d)) + res // (2**256 + BITS_PER_LIMB * d) * 2**(BITS_PER_LIMB * d) * (2**256 - prime)
  505. // <= (2**(256 + 29*d + 31) % 2**(256 + 29*d)) + (2**(256 + 29*d + 31) - 1) / (2**256 + 29*d) * 2**(29*d) * (2**256 - prime)
  506. // <= 2**(256 + 29*d) + 2**(256 + 29*d + 31) / (2**256 + 29*d) * 2**(29*d) * (2**256 - prime)
  507. // == 2**(256 + 29*d) + 2**31 * 2**(29*d) * (2**256 - prime)
  508. // == 2**(29*d) * (2**256 + 2**31 * (2*256 - prime))
  509. // <= 2**(29*d) * (2**256 + 2**31 * 2*224)
  510. // <= 2**(29*d) * (2**256 + 2**255)
  511. // <= 2**(29*d) * 2 * (2**256 - 2**224)
  512. // <= 2 * prime * 2**(29*d)
  513. // clang-format on
  514. uint32_t coef =
  515. (res[d + BN_LIMBS - 1] >> (256 - (BN_LIMBS - 1) * BN_BITS_PER_LIMB)) +
  516. (res[d + BN_LIMBS] << ((BN_LIMBS * BN_BITS_PER_LIMB) - 256));
  517. // coef == res // 2**(256 + BITS_PER_LIMB * d)
  518. // coef < 2**31
  519. // Proof:
  520. // coef == res // 2**(256 + BITS_PER_LIMB * d)
  521. // < 2**(256 + 29 * d + 31) // 2**(256 + 29 * d)
  522. // == 2**31
  523. const int shift = 31;
  524. uint64_t acc = 1ull << shift;
  525. for (int i = 0; i < BN_LIMBS; i++) {
  526. acc += (((uint64_t)(BN_BASE - 1)) << shift) + res[d + i] -
  527. prime->val[i] * (uint64_t)coef;
  528. // acc neither overflow 64 bits nor underflow zero
  529. // Proof:
  530. // acc + ((BASE - 1) << shift) + res[d + i] - prime[i] * coef
  531. // >= ((BASE - 1) << shift) - prime[i] * coef
  532. // == 2**shift * (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) *
  533. // (2**31 - 1)
  534. // == (2**shift - 2**31 + 1) * (2**BITS_PER_LIMB - 1)
  535. // == (2**31 - 2**31 + 1) * (2**29 - 1)
  536. // == 2**29 - 1 > 0
  537. // acc + ((BASE - 1) << shift) + res[d + i] - prime[i] * coef
  538. // <= acc + ((BASE - 1) << shift) + res[d+i]
  539. // <= (2**(64 - BITS_PER_LIMB) - 1) + 2**shift * (2**BITS_PER_LIMB - 1)
  540. // + (2*BITS_PER_LIMB - 1)
  541. // == (2**(64 - BITS_PER_LIMB) - 1) + (2**shift + 1) *
  542. // (2**BITS_PER_LIMB - 1)
  543. // == (2**35 - 1) + (2**31 + 1) * (2**29 - 1)
  544. // <= 2**35 + 2**60 + 2**29 < 2**64
  545. res[d + i] = acc & BN_LIMB_MASK;
  546. acc >>= BN_BITS_PER_LIMB;
  547. // acc <= 2**(64 - BITS_PER_LIMB) - 1 == 2**35 - 1
  548. // acc == (1 << BITS_PER_LIMB * (i + 1) + shift) + res[d : d + i + 1]
  549. // - coef * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
  550. }
  551. // acc += (((uint64_t)(BASE - 1)) << shift) + res[d + LIMBS];
  552. // acc >>= BITS_PER_LIMB;
  553. // assert(acc <= 1ul << shift);
  554. // clang-format off
  555. // acc == 1 << shift
  556. // Proof:
  557. // acc
  558. // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime[:LIMBS] >> BITS_PER_LIMB * (LIMBS + 1)
  559. // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime >> BITS_PER_LIMB * (LIMBS + 1)
  560. // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res[d : d + LIMBS + 1] - coef * prime) >> BITS_PER_LIMB * (LIMBS + 1)
  561. // <= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res[:d] + BASE**d * res[d : d + LIMBS + 1] - BASE**d * coef * prime)//BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
  562. // <= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res - BASE**d * coef * prime) // BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
  563. // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (2 * prime * BASE**d) // BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
  564. // <= (1 << 321) + 2 * 2**256 >> 290
  565. // == 1 << 31 == 1 << shift
  566. // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime[:LIMBS + 1] >> BITS_PER_LIMB * (LIMBS + 1)
  567. // >= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + 0 >> BITS_PER_LIMB * (LIMBS + 1)
  568. // == 1 << shift
  569. // clang-format on
  570. res[d + BN_LIMBS] = 0;
  571. }
  572. // Auxiliary function for bn_multiply
  573. // Partly reduces res and stores both in x and res
  574. // Assumes res in normalized and res < 2**519
  575. // Guarantees x is normalized and partly reduced modulo prime
  576. // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
  577. void bn_multiply_reduce(bignum256 *x, uint32_t res[2 * BN_LIMBS],
  578. const bignum256 *prime) {
  579. for (int i = BN_LIMBS - 1; i >= 0; i--) {
  580. // res < 2**(256 + 29*i + 31)
  581. // Proof:
  582. // if i == LIMBS - 1:
  583. // res < 2**519
  584. // == 2**(256 + 29 * 8 + 31)
  585. // == 2**(256 + 29 * (LIMBS - 1) + 31)
  586. // else:
  587. // res < 2 * prime * 2**(29 * (i + 1))
  588. // <= 2**256 * 2**(29*i + 29) < 2**(256 + 29*i + 31)
  589. bn_multiply_reduce_step(res, prime, i);
  590. }
  591. for (int i = 0; i < BN_LIMBS; i++) {
  592. x->val[i] = res[i];
  593. }
  594. }
  595. // x = k * x % prime
  596. // Assumes k, x are normalized, k * x < 2**519
  597. // Guarantees x is normalized and partly reduced modulo prime
  598. // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
  599. void bn_multiply(const bignum256 *k, bignum256 *x, const bignum256 *prime) {
  600. uint32_t res[2 * BN_LIMBS] = {0};
  601. bn_multiply_long(k, x, res);
  602. bn_multiply_reduce(x, res, prime);
  603. memzero(res, sizeof(res));
  604. }
  605. // Partly reduces x modulo prime
  606. // Assumes limbs of x except the last (the most significant) one are normalized
  607. // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
  608. // Guarantees x is normalized and partly reduced modulo prime
  609. void bn_fast_mod(bignum256 *x, const bignum256 *prime) {
  610. // Computes x = x - (x // 2**256) * prime
  611. // x < 2**((LIMBS - 1) * BITS_PER_LIMB + 32) == 2**264
  612. // x - (x // 2**256) * prime < 2 * prime
  613. // Proof:
  614. // x - (x // 2**256) * prime
  615. // == x - (x // 2**256) * (2**256 - (2**256 - prime))
  616. // == x - ((x // 2**256) * 2**256) + (x // 2**256) * (2**256 - prime)
  617. // == (x % prime) + (x // 2**256) * (2**256 - prime)
  618. // <= prime - 1 + (2**264 // 2**256) * (2**256 - prime)
  619. // <= 2**256 + 2**8 * 2**224 == 2**256 + 2**232
  620. // < 2 * (2**256 - 2**224)
  621. // <= 2 * prime
  622. // x - (x // 2**256 - 1) * prime < 2 * prime
  623. // Proof:
  624. // x - (x // 2**256) * prime + prime
  625. // == x - (x // 2**256) * (2**256 - (2**256 - prime)) + prime
  626. // == x - ((x//2**256) * 2**256) + (x//2**256) * (2**256 - prime) + prime
  627. // == (x % prime) + (x // 2**256) * (2**256 - prime) + prime
  628. // <= 2 * prime - 1 + (2**264 // 2**256) * (2**256 - prime)
  629. // <= 2 * prime + 2**8 * 2**224 == 2**256 + 2**232 + 2**256 - 2**224
  630. // < 2 * (2**256 - 2**224)
  631. // <= 2 * prime
  632. uint32_t coef =
  633. x->val[BN_LIMBS - 1] >> (256 - ((BN_LIMBS - 1) * BN_BITS_PER_LIMB));
  634. // clang-format off
  635. // coef == x // 2**256
  636. // 0 <= coef < 2**((LIMBS - 1) * BITS_PER_LIMB + 32 - 256) == 256
  637. // Proof:
  638. //* Let x[[a : b] be the number consisting of a-th to (b-1)-th bit of the number x.
  639. // x[LIMBS - 1] >> (256 - ((LIMBS - 1) * BITS_PER_LIMB))
  640. // == x[[(LIMBS - 1) * BITS_PER_LIMB : (LIMBS - 1) * BITS_PER_LIMB + 32]] >> (256 - ((LIMBS - 1) * BITS_PER_LIMB))
  641. // == x[[256 - ((LIMBS - 1) * BITS_PER_LIMB) + (LIMBS - 1) * BITS_PER_LIMB : (LIMBS - 1) * BITS_PER_LIMB + 32]]
  642. // == x[[256 : (LIMBS - 1) * BITS_PER_LIMB + 32]]
  643. // == x[[256 : 264]] == x // 2**256
  644. // clang-format on
  645. const int shift = 8;
  646. uint64_t acc = 1ull << shift;
  647. for (int i = 0; i < BN_LIMBS; i++) {
  648. acc += (((uint64_t)(BN_BASE - 1)) << shift) + x->val[i] -
  649. prime->val[i] * (uint64_t)coef;
  650. // acc neither overflows 64 bits nor underflows 0
  651. // Proof:
  652. // acc + (BASE - 1 << shift) + x[i] - prime[i] * coef
  653. // >= (BASE - 1 << shift) - prime[i] * coef
  654. // >= 2**shift * (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) * 255
  655. // == (2**shift - 255) * (2**BITS_PER_LIMB - 1)
  656. // == (2**8 - 255) * (2**29 - 1) == 2**29 - 1 >= 0
  657. // acc + (BASE - 1 << shift) + x[i] - prime[i] * coef
  658. // <= acc + ((BASE - 1) << shift) + x[i]
  659. // <= (2**(64 - BITS_PER_LIMB) - 1) + 2**shift * (2**BITS_PER_LIMB - 1)
  660. // + (2**32 - 1)
  661. // == (2**35 - 1) + 2**8 * (2**29 - 1) + 2**32
  662. // < 2**35 + 2**37 + 2**32 < 2**64
  663. x->val[i] = acc & BN_LIMB_MASK;
  664. acc >>= BN_BITS_PER_LIMB;
  665. // acc <= 2**(64 - BITS_PER_LIMB) - 1 == 2**35 - 1
  666. // acc == (1 << BITS_PER_LIMB * (i + 1) + shift) + x[:i + 1]
  667. // - coef * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
  668. }
  669. // assert(acc == 1 << shift);
  670. // clang-format off
  671. // acc == 1 << shift
  672. // Proof:
  673. // acc
  674. // == (1 << BITS_PER_LIMB * LIMBS + shift) + x[:LIMBS] - coef * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
  675. // == (1 << BITS_PER_LIMB * LIMBS + shift) + (x - coef * prime) >> BITS_PER_LIMB * LIMBS
  676. // <= (1 << BITS_PER_LIMB * LIMBS + shift) + (2 * prime) >> BITS_PER_LIMB * LIMBS
  677. // <= (1 << BITS_PER_LIMB * LIMBS + shift) + 2 * 2**256 >> BITS_PER_LIMB * LIMBS
  678. // <= 2**269 + 2**257 >> 2**261
  679. // <= 1 << 8 == 1 << shift
  680. // acc
  681. // == (1 << BITS_PER_LIMB * LIMBS + shift) + x[:LIMBS] - coef * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
  682. // >= (1 << BITS_PER_LIMB * LIMBS + shift) + 0 >> BITS_PER_LIMB * LIMBS
  683. // == (1 << BITS_PER_LIMB * LIMBS + shift) + 0 >> BITS_PER_LIMB * LIMBS
  684. // <= 1 << 8 == 1 << shift
  685. // clang-format on
  686. }
  687. // res = x**e % prime
  688. // Assumes both x and e are normalized, x < 2**259
  689. // Guarantees res is normalized and partly reduced modulo prime
  690. // Works properly even if &x == &res
  691. // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
  692. // The function doesn't have neither constant control flow nor constant memory
  693. // access flow with regard to e
  694. void bn_power_mod(const bignum256 *x, const bignum256 *e,
  695. const bignum256 *prime, bignum256 *res) {
  696. // Uses iterative right-to-left exponentiation by squaring, see
  697. // https://en.wikipedia.org/wiki/Modular_exponentiation#Right-to-left_binary_method
  698. bignum256 acc = {0};
  699. bn_copy(x, &acc);
  700. bn_one(res);
  701. for (int i = 0; i < BN_LIMBS; i++) {
  702. uint32_t limb = e->val[i];
  703. for (int j = 0; j < BN_BITS_PER_LIMB; j++) {
  704. // Break if the following bits of the last limb are zero
  705. if (i == BN_LIMBS - 1 && limb == 0) break;
  706. if (limb & 1)
  707. // acc * res < 2**519
  708. // Proof:
  709. // acc * res <= max(2**259 - 1, 2 * prime) * (2 * prime)
  710. // == max(2**259 - 1, 2**257) * 2**257 < 2**259 * 2**257
  711. // == 2**516 < 2**519
  712. bn_multiply(&acc, res, prime);
  713. limb >>= 1;
  714. // acc * acc < 2**519
  715. // Proof:
  716. // acc * acc <= max(2**259 - 1, 2 * prime)**2
  717. // <= (2**259)**2 == 2**518 < 2**519
  718. bn_multiply(&acc, &acc, prime);
  719. }
  720. // acc == x**(e[:i + 1]) % prime
  721. }
  722. memzero(&acc, sizeof(acc));
  723. }
  724. // x = sqrt(x) % prime
  725. // Explicitly x = x**((prime+1)/4) % prime
  726. // The other root is -sqrt(x)
  727. // Assumes x is normalized, x < 2**259 and quadratic residuum mod prime
  728. // Assumes prime is a prime number, prime % 4 == 3, it is normalized and
  729. // 2**256 - 2**224 <= prime <= 2**256
  730. // Guarantees x is normalized and fully reduced modulo prime
  731. // The function doesn't have neither constant control flow nor constant memory
  732. // access flow with regard to prime
  733. void bn_sqrt(bignum256 *x, const bignum256 *prime) {
  734. // Uses the Lagrange formula for the primes of the special form, see
  735. // http://en.wikipedia.org/wiki/Quadratic_residue#Prime_or_prime_power_modulus
  736. // If prime % 4 == 3, then sqrt(x) % prime == x**((prime+1)//4) % prime
  737. assert(prime->val[BN_LIMBS - 1] % 4 == 3);
  738. // e = (prime + 1) // 4
  739. bignum256 e = {0};
  740. bn_copy(prime, &e);
  741. bn_addi(&e, 1);
  742. bn_rshift(&e);
  743. bn_rshift(&e);
  744. bn_power_mod(x, &e, prime, x);
  745. bn_mod(x, prime);
  746. memzero(&e, sizeof(e));
  747. }
  748. // a = 1/a % 2**n
  749. // Assumes a is odd, 1 <= n <= 32
  750. // The function doesn't have neither constant control flow nor constant memory
  751. // access flow with regard to n
  752. uint32_t inverse_mod_power_two(uint32_t a, uint32_t n) {
  753. // Uses "Explicit Quadratic Modular inverse modulo 2" from section 3.3 of "On
  754. // Newton-Raphson iteration for multiplicative inverses modulo prime powers"
  755. // by Jean-Guillaume Dumas, see
  756. // https://arxiv.org/pdf/1209.6626.pdf
  757. // 1/a % 2**n
  758. // = (2-a) * product([1 + (a-1)**(2**i) for i in range(1, floor(log2(n)))])
  759. uint32_t acc = 2 - a;
  760. uint32_t f = a - 1;
  761. // mask = (1 << n) - 1
  762. uint32_t mask = n == 32 ? 0xFFFFFFFF : (1u << n) - 1;
  763. for (uint32_t i = 1; i < n; i <<= 1) {
  764. f = (f * f) & mask;
  765. acc = (acc * (1 + f)) & mask;
  766. }
  767. return acc;
  768. }
  769. // x = (x / 2**BITS_PER_LIMB) % prime
  770. // Assumes both x and prime are normalized
  771. // Assumes prime is an odd number and normalized
  772. // Guarantees x is normalized
  773. // If x is partly reduced (fully reduced) modulo prime,
  774. // guarantess x will be partly reduced (fully reduced) modulo prime
  775. void bn_divide_base(bignum256 *x, const bignum256 *prime) {
  776. // Uses an explicit formula for the modular inverse of power of two
  777. // (x / 2**n) % prime == (x + ((-x / prime) % 2**n) * prime) // 2**n
  778. // Proof:
  779. // (x + ((-x / prime) % 2**n) * prime) % 2**n
  780. // == (x - x / prime * prime) % 2**n
  781. // == 0
  782. // (x + ((-1 / prime) % 2**n) * prime) % prime
  783. // == x
  784. // if x < prime:
  785. // (x + ((-x / prime) % 2**n) * prime) // 2**n
  786. // <= ((prime - 1) + (2**n - 1) * prime) / 2**n
  787. // == (2**n * prime - 1) / 2**n == prime - 1 / 2**n < prime
  788. // if x < 2 * prime:
  789. // (x + ((-x / prime) % 2**n) * prime) // 2**n
  790. // <= ((2 * prime - 1) + (2**n - 1) * prime) / 2**n
  791. // == (2**n * prime + prime - 1) / 2**n
  792. // == prime + (prime - 1) / 2**n < 2 * prime
  793. // m = (-x / prime) % 2**BITS_PER_LIMB
  794. uint32_t m = (x->val[0] * (BN_BASE - inverse_mod_power_two(
  795. prime->val[0], BN_BITS_PER_LIMB))) &
  796. BN_LIMB_MASK;
  797. // m < 2**BITS_PER_LIMB
  798. uint64_t acc = x->val[0] + (uint64_t)m * prime->val[0];
  799. acc >>= BN_BITS_PER_LIMB;
  800. for (int i = 1; i < BN_LIMBS; i++) {
  801. acc = acc + x->val[i] + (uint64_t)m * prime->val[i];
  802. // acc does not overflow 64 bits
  803. // acc == acc + x + m * prime
  804. // <= 2**(64 - BITS_PER_LIMB) + 2**(BITS_PER_LIMB)
  805. // 2**(BITS_PER_LIMB) * 2**(BITS_PER_LIMB)
  806. // <= 2**(2 * BITS_PER_LIMB) + 2**(64 - BITS_PER_LIMB) +
  807. // 2**(BITS_PER_LIMB)
  808. // <= 2**58 + 2**35 + 2**29 < 2**64
  809. x->val[i - 1] = acc & BN_LIMB_MASK;
  810. acc >>= BN_BITS_PER_LIMB;
  811. // acc < 2**35 == 2**(64 - BITS_PER_LIMB)
  812. // acc == x[:i + 1] + m * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
  813. }
  814. x->val[BN_LIMBS - 1] = acc;
  815. assert(acc >> BN_BITS_PER_LIMB == 0);
  816. // clang-format off
  817. // acc >> BITS_PER_LIMB == 0
  818. // Proof:
  819. // acc >> BITS_PER_LIMB
  820. // == (x[:LIMB] + m * prime[:LIMB] >> BITS_PER_LIMB * LIMBS) >> BITS_PER_LIMB * (LIMBS + 1)
  821. // == x + m * prime >> BITS_PER_LIMB * (LIMBS + 1)
  822. // <= (2**(BITS_PER_LIMB * LIMBS) - 1) + (2**BITS_PER_LIMB - 1) * (2**(BITS_PER_LIMB * LIMBS) - 1) >> BITS_PER_LIMB * (LIMBS + 1)
  823. // == 2**(BITS_PER_LIMB * LIMBS) - 1 + 2**(BITS_PER_LIMB * (LIMBS + 1)) - 2**(BITS_PER_LIMB * LIMBS) - 2**BITS_PER_LIMB + 1 >> BITS_PER_LIMB * (LIMBS + 1)
  824. // == 2**(BITS_PER_LIMB * (LIMBS + 1)) - 2**BITS_PER_LIMB >> BITS_PER_LIMB * (LIMBS + 1)
  825. // == 0
  826. // clang-format on
  827. }
  828. #if !USE_INVERSE_FAST
  829. // x = 1/x % prime if x != 0 else 0
  830. // Assumes x is normalized
  831. // Assumes prime is a prime number
  832. // Guarantees x is normalized and fully reduced modulo prime
  833. // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
  834. // The function doesn't have neither constant control flow nor constant memory
  835. // access flow with regard to prime
  836. static void bn_inverse_slow(bignum256 *x, const bignum256 *prime) {
  837. // Uses formula 1/x % prime == x**(prime - 2) % prime
  838. // See https://en.wikipedia.org/wiki/Fermat%27s_little_theorem
  839. bn_fast_mod(x, prime);
  840. // e = prime - 2
  841. bignum256 e = {0};
  842. bn_read_uint32(2, &e);
  843. bn_subtract(prime, &e, &e);
  844. bn_power_mod(x, &e, prime, x);
  845. bn_mod(x, prime);
  846. memzero(&e, sizeof(e));
  847. }
  848. #endif
  849. #if false
  850. // x = 1/x % prime if x != 0 else 0
  851. // Assumes x is is_normalized
  852. // Assumes GCD(x, prime) = 1
  853. // Guarantees x is normalized and fully reduced modulo prime
  854. // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
  855. // The function doesn't have neither constant control flow nor constant memory
  856. // access flow with regard to prime and x
  857. static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) {
  858. // "The Almost Montgomery Inverse" from the section 3 of "Constant Time
  859. // Modular Inversion" by Joppe W. Bos
  860. // See http://www.joppebos.com/files/CTInversion.pdf
  861. /*
  862. u = prime
  863. v = x & prime
  864. s = 1
  865. r = 0
  866. k = 0
  867. while v != 1:
  868. k += 1
  869. if is_even(u):
  870. u = u // 2
  871. s = 2 * s
  872. elif is_even(v):
  873. v = v // 2
  874. r = 2 * r
  875. elif v < u:
  876. u = (u - v) // 2
  877. r = r + s
  878. s = 2 * s
  879. else:
  880. v = (v - u) // 2
  881. s = r + s
  882. r = 2 * r
  883. s = (s / 2**k) % prime
  884. return s
  885. */
  886. if (bn_is_zero(x)) return;
  887. bn_fast_mod(x, prime);
  888. bn_mod(x, prime);
  889. bignum256 u = {0}, v = {0}, r = {0}, s = {0};
  890. bn_copy(prime, &u);
  891. bn_copy(x, &v);
  892. bn_one(&s);
  893. bn_zero(&r);
  894. int k = 0;
  895. while (!bn_is_one(&v)) {
  896. if ((u.val[0] & 1) == 0) {
  897. bn_rshift(&u);
  898. bn_lshift(&s);
  899. } else if ((v.val[0] & 1) == 0) {
  900. bn_rshift(&v);
  901. bn_lshift(&r);
  902. } else if (bn_is_less(&v, &u)) {
  903. bn_subtract(&u, &v, &u);
  904. bn_rshift(&u);
  905. bn_add(&r, &s);
  906. bn_lshift(&s);
  907. } else {
  908. bn_subtract(&v, &u, &v);
  909. bn_rshift(&v);
  910. bn_add(&s, &r);
  911. bn_lshift(&r);
  912. }
  913. k += 1;
  914. assert(!bn_is_zero(&v)); // assert GCD(x, prime) == 1
  915. }
  916. // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
  917. for (int i = 0; i < k / BITS_PER_LIMB; i++) {
  918. bn_divide_base(&s, prime);
  919. }
  920. // s = s / 2**(k % BITS_PER_LIMB)
  921. for (int i = 0; i < k % BN_BITS_PER_LIMB; i++) {
  922. bn_mult_half(&s, prime);
  923. }
  924. bn_copy(&s, x);
  925. memzero(&u, sizeof(u));
  926. memzero(&v, sizeof(v));
  927. memzero(&r, sizeof(r));
  928. memzero(&s, sizeof(s));
  929. }
  930. #endif
  931. #if USE_INVERSE_FAST
  932. // x = 1/x % prime if x != 0 else 0
  933. // Assumes x is is_normalized
  934. // Assumes GCD(x, prime) = 1
  935. // Guarantees x is normalized and fully reduced modulo prime
  936. // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
  937. // The function has constant control flow but not constant memory access flow
  938. // with regard to prime and x
  939. static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) {
  940. // Custom constant time version of "The Almost Montgomery Inverse" from the
  941. // section 3 of "Constant Time Modular Inversion" by Joppe W. Bos
  942. // See http://www.joppebos.com/files/CTInversion.pdf
  943. /*
  944. u = prime
  945. v = x % prime
  946. s = 1
  947. r = 0
  948. k = 0
  949. while v != 1:
  950. k += 1
  951. if is_even(u): # b1
  952. u = u // 2
  953. s = 2 * s
  954. elif is_even(v): # b2
  955. v = v // 2
  956. r = 2 * r
  957. elif v < u: # b3
  958. u = (u - v) // 2
  959. r = r + s
  960. s = 2 * s
  961. else: # b4
  962. v = (v - u) // 2
  963. s = r + s
  964. r = 2 * r
  965. s = (s / 2**k) % prime
  966. return s
  967. */
  968. bn_fast_mod(x, prime);
  969. bn_mod(x, prime);
  970. bignum256 u = {0}, v = {0}, r = {0}, s = {0};
  971. bn_copy(prime, &u);
  972. bn_copy(x, &v);
  973. bn_one(&s);
  974. bn_zero(&r);
  975. bignum256 zero = {0};
  976. bn_zero(&zero);
  977. int k = 0;
  978. int finished = 0, u_even = 0, v_even = 0, v_less_u = 0, b1 = 0, b2 = 0,
  979. b3 = 0, b4 = 0;
  980. finished = 0;
  981. for (int i = 0; i < 2 * BN_LIMBS * BN_BITS_PER_LIMB; i++) {
  982. finished = finished | -bn_is_one(&v);
  983. u_even = -bn_is_even(&u);
  984. v_even = -bn_is_even(&v);
  985. v_less_u = -bn_is_less(&v, &u);
  986. b1 = ~finished & u_even;
  987. b2 = ~finished & ~b1 & v_even;
  988. b3 = ~finished & ~b1 & ~b2 & v_less_u;
  989. b4 = ~finished & ~b1 & ~b2 & ~b3;
  990. // The ternary operator for pointers with constant control flow
  991. // BN_INVERSE_FAST_TERNARY(c, t, f) = t if c else f
  992. // Very nasty hack, sorry for that
  993. #define BN_INVERSE_FAST_TERNARY(c, t, f) \
  994. ((void *)(((c) & (uintptr_t)(t)) | (~(c) & (uintptr_t)(f))))
  995. bn_subtract(BN_INVERSE_FAST_TERNARY(b3, &u, &v),
  996. BN_INVERSE_FAST_TERNARY(
  997. b3 | b4, BN_INVERSE_FAST_TERNARY(b3, &v, &u), &zero),
  998. BN_INVERSE_FAST_TERNARY(b3, &u, &v));
  999. bn_add(BN_INVERSE_FAST_TERNARY(b3, &r, &s),
  1000. BN_INVERSE_FAST_TERNARY(b3 | b4, BN_INVERSE_FAST_TERNARY(b3, &s, &r),
  1001. &zero));
  1002. bn_rshift(BN_INVERSE_FAST_TERNARY(b1 | b3, &u, &v));
  1003. bn_lshift(BN_INVERSE_FAST_TERNARY(b1 | b3, &s, &r));
  1004. k = k - ~finished;
  1005. }
  1006. // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
  1007. for (int i = 0; i < 2 * BN_LIMBS; i++) {
  1008. // s = s / 2**BITS_PER_LIMB % prime if i < k // BITS_PER_LIMB else s
  1009. bn_copy(&s, &r);
  1010. bn_divide_base(&r, prime);
  1011. bn_cmov(&s, i < k / BN_BITS_PER_LIMB, &r, &s);
  1012. }
  1013. // s = s / 2**(k % BITS_PER_LIMB)
  1014. for (int i = 0; i < BN_BITS_PER_LIMB; i++) {
  1015. // s = s / 2 % prime if i < k % BITS_PER_LIMB else s
  1016. bn_copy(&s, &r);
  1017. bn_mult_half(&r, prime);
  1018. bn_cmov(&s, i < k % BN_BITS_PER_LIMB, &r, &s);
  1019. }
  1020. bn_cmov(x, bn_is_zero(x), x, &s);
  1021. memzero(&u, sizeof(u));
  1022. memzero(&v, sizeof(v));
  1023. memzero(&r, sizeof(s));
  1024. memzero(&s, sizeof(s));
  1025. }
  1026. #endif
  1027. #if false
  1028. // x = 1/x % prime if x != 0 else 0
  1029. // Assumes x is is_normalized
  1030. // Assumes GCD(x, prime) = 1
  1031. // Guarantees x is normalized and fully reduced modulo prime
  1032. // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
  1033. static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) {
  1034. // Custom constant time version of "The Almost Montgomery Inverse" from the
  1035. // section 3 of "Constant Time Modular Inversion" by Joppe W. Bos
  1036. // See http://www.joppebos.com/files/CTInversion.pdf
  1037. /*
  1038. u = prime
  1039. v = x % prime
  1040. s = 1
  1041. r = 0
  1042. k = 0
  1043. while v != 1:
  1044. k += 1
  1045. if is_even(u): # b1
  1046. u = u // 2
  1047. s = 2 * s
  1048. elif is_even(v): # b2
  1049. v = v // 2
  1050. r = 2 * r
  1051. elif v < u: # b3
  1052. u = (u - v) // 2
  1053. r = r + s
  1054. s = 2 * s
  1055. else: # b4
  1056. v = (v - u) // 2
  1057. s = r + s
  1058. r = 2 * r
  1059. s = (s / 2**k) % prime
  1060. return s
  1061. */
  1062. bn_fast_mod(x, prime);
  1063. bn_mod(x, prime);
  1064. bignum256 u = {0}, v = {0}, r = {0}, s = {0};
  1065. bn_copy(prime, &u);
  1066. bn_copy(x, &v);
  1067. bn_one(&s);
  1068. bn_zero(&r);
  1069. bignum256 zero = {0};
  1070. bn_zero(&zero);
  1071. int k = 0;
  1072. uint32_t finished = 0, u_even = 0, v_even = 0, v_less_u = 0, b1 = 0, b2 = 0,
  1073. b3 = 0, b4 = 0;
  1074. finished = 0;
  1075. bignum256 u_half = {0}, v_half = {0}, u_minus_v_half = {0}, v_minus_u_half = {0}, r_plus_s = {0}, r_twice = {0}, s_twice = {0};
  1076. for (int i = 0; i < 2 * BN_LIMBS * BN_BITS_PER_LIMB; i++) {
  1077. finished = finished | bn_is_one(&v);
  1078. u_even = bn_is_even(&u);
  1079. v_even = bn_is_even(&v);
  1080. v_less_u = bn_is_less(&v, &u);
  1081. b1 = (finished ^ 1) & u_even;
  1082. b2 = (finished ^ 1) & (b1 ^ 1) & v_even;
  1083. b3 = (finished ^ 1) & (b1 ^ 1) & (b2 ^ 1) & v_less_u;
  1084. b4 = (finished ^ 1) & (b1 ^ 1) & (b2 ^ 1) & (b3 ^ 1);
  1085. // u_half = u // 2
  1086. bn_copy(&u, &u_half);
  1087. bn_rshift(&u_half);
  1088. // v_half = v // 2
  1089. bn_copy(&v, &v_half);
  1090. bn_rshift(&v_half);
  1091. // u_minus_v_half = (u - v) // 2
  1092. bn_subtract(&u, &v, &u_minus_v_half);
  1093. bn_rshift(&u_minus_v_half);
  1094. // v_minus_u_half = (v - u) // 2
  1095. bn_subtract(&v, &u, &v_minus_u_half);
  1096. bn_rshift(&v_minus_u_half);
  1097. // r_plus_s = r + s
  1098. bn_copy(&r, &r_plus_s);
  1099. bn_add(&r_plus_s, &s);
  1100. // r_twice = 2 * r
  1101. bn_copy(&r, &r_twice);
  1102. bn_lshift(&r_twice);
  1103. // s_twice = 2 * s
  1104. bn_copy(&s, &s_twice);
  1105. bn_lshift(&s_twice);
  1106. bn_cmov(&u, b1, &u_half, &u);
  1107. bn_cmov(&u, b3, &u_minus_v_half, &u);
  1108. bn_cmov(&v, b2, &v_half, &v);
  1109. bn_cmov(&v, b4, &v_minus_u_half, &v);
  1110. bn_cmov(&r, b2 | b4, &r_twice, &r);
  1111. bn_cmov(&r, b3, &r_plus_s, &r);
  1112. bn_cmov(&s, b1 | b3, &s_twice, &s);
  1113. bn_cmov(&s, b4, &r_plus_s, &s);
  1114. k = k + (finished ^ 1);
  1115. }
  1116. // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
  1117. for (int i = 0; i < 2 * BN_LIMBS; i++) {
  1118. // s = s / 2**BITS_PER_LIMB % prime if i < k // BITS_PER_LIMB else s
  1119. bn_copy(&s, &r);
  1120. bn_divide_base(&r, prime);
  1121. bn_cmov(&s, i < k / BITS_PER_LIMB, &r, &s);
  1122. }
  1123. // s = s / 2**(k % BITS_PER_LIMB)
  1124. for (int i = 0; i < BN_BITS_PER_LIMB; i++) {
  1125. // s = s / 2 % prime if i < k % BITS_PER_LIMB else s
  1126. bn_copy(&s, &r);
  1127. bn_mult_half(&r, prime);
  1128. bn_cmov(&s, i < k % BN_BITS_PER_LIMB, &r, &s);
  1129. }
  1130. bn_cmov(x, bn_is_zero(x), x, &s);
  1131. memzero(&u, sizeof(u));
  1132. memzero(&v, sizeof(v));
  1133. memzero(&r, sizeof(r));
  1134. memzero(&s, sizeof(s));
  1135. memzero(&u_half, sizeof(u_half));
  1136. memzero(&v_half, sizeof(v_half));
  1137. memzero(&u_minus_v_half, sizeof(u_minus_v_half));
  1138. memzero(&v_minus_u_half, sizeof(v_minus_u_half));
  1139. memzero(&r_twice, sizeof(r_twice));
  1140. memzero(&s_twice, sizeof(s_twice));
  1141. memzero(&r_plus_s, sizeof(r_plus_s));
  1142. }
  1143. #endif
  1144. // Normalizes x
  1145. // Assumes x < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
  1146. // Guarantees x is normalized
  1147. void bn_normalize(bignum256 *x) {
  1148. uint32_t acc = 0;
  1149. for (int i = 0; i < BN_LIMBS; i++) {
  1150. acc += x->val[i];
  1151. // acc doesn't overflow 32 bits
  1152. // Proof:
  1153. // acc + x[i]
  1154. // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1)
  1155. // == 7 + 2**29 - 1 < 2**32
  1156. x->val[i] = acc & BN_LIMB_MASK;
  1157. acc >>= (BN_BITS_PER_LIMB);
  1158. // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
  1159. }
  1160. }
  1161. // x = x + y
  1162. // Assumes x, y are normalized, x + y < 2**(LIMBS*BITS_PER_LIMB) == 2**261
  1163. // Guarantees x is normalized
  1164. // Works properly even if &x == &y
  1165. void bn_add(bignum256 *x, const bignum256 *y) {
  1166. uint32_t acc = 0;
  1167. for (int i = 0; i < BN_LIMBS; i++) {
  1168. acc += x->val[i] + y->val[i];
  1169. // acc doesn't overflow 32 bits
  1170. // Proof:
  1171. // acc + x[i] + y[i]
  1172. // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1)
  1173. // == (2**(32 - BITS_PER_LIMB) - 1) + 2**(BITS_PER_LIMB + 1) - 2
  1174. // == 7 + 2**30 - 2 < 2**32
  1175. x->val[i] = acc & BN_LIMB_MASK;
  1176. acc >>= BN_BITS_PER_LIMB;
  1177. // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
  1178. // acc == x[:i + 1] + y[:i + 1] >> BITS_PER_LIMB * (i + 1)
  1179. }
  1180. // assert(acc == 0); // assert x + y < 2**261
  1181. // acc == 0
  1182. // Proof:
  1183. // acc == x[:LIMBS] + y[:LIMBS] >> LIMBS * BITS_PER_LIMB
  1184. // == x + y >> LIMBS * BITS_PER_LIMB
  1185. // <= 2**(LIMBS * BITS_PER_LIMB) - 1 >> LIMBS * BITS_PER_LIMB == 0
  1186. }
  1187. // x = x + y % prime
  1188. // Assumes x, y are normalized
  1189. // Guarantees x is normalized and partly reduced modulo prime
  1190. // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
  1191. void bn_addmod(bignum256 *x, const bignum256 *y, const bignum256 *prime) {
  1192. for (int i = 0; i < BN_LIMBS; i++) {
  1193. x->val[i] += y->val[i];
  1194. // x[i] doesn't overflow 32 bits
  1195. // Proof:
  1196. // x[i] + y[i]
  1197. // <= 2 * (2**BITS_PER_LIMB - 1)
  1198. // == 2**30 - 2 < 2**32
  1199. }
  1200. bn_fast_mod(x, prime);
  1201. }
  1202. // x = x + y
  1203. // Assumes x is normalized
  1204. // Assumes y <= 2**32 - 2**29 == 2**32 - 2**BITS_PER_LIMB and
  1205. // x + y < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
  1206. // Guarantees x is normalized
  1207. void bn_addi(bignum256 *x, uint32_t y) {
  1208. // assert(y <= 3758096384); // assert y <= 2**32 - 2**29
  1209. uint32_t acc = y;
  1210. for (int i = 0; i < BN_LIMBS; i++) {
  1211. acc += x->val[i];
  1212. // acc doesn't overflow 32 bits
  1213. // Proof:
  1214. // if i == 0:
  1215. // acc + x[i] == y + x[0]
  1216. // <= (2**32 - 2**BITS_PER_LIMB) + (2**BITS_PER_LIMB - 1)
  1217. // == 2**32 - 1 < 2**32
  1218. // else:
  1219. // acc + x[i]
  1220. // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1)
  1221. // == 7 + 2**29 - 1 < 2**32
  1222. x->val[i] = acc & BN_LIMB_MASK;
  1223. acc >>= (BN_BITS_PER_LIMB);
  1224. // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
  1225. // acc == x[:i + 1] + y >> BITS_PER_LIMB * (i + 1)
  1226. }
  1227. // assert(acc == 0); // assert x + y < 2**261
  1228. // acc == 0
  1229. // Proof:
  1230. // acc == x[:LIMBS] + y << LIMBS * BITS_PER_LIMB
  1231. // == x + y << LIMBS * BITS_PER_LIMB
  1232. // <= 2**(LIMBS + BITS_PER_LIMB) - 1 << LIMBS * BITS_PER_LIMB
  1233. // == 0
  1234. }
  1235. // x = x - y % prime
  1236. // Explicitly x = x + prime - y
  1237. // Assumes x, y are normalized
  1238. // Assumes y < prime[0], x + prime - y < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
  1239. // Guarantees x is normalized
  1240. // If x is fully reduced modulo prime,
  1241. // guarantess x will be partly reduced modulo prime
  1242. // Assumes prime is nonzero and normalized
  1243. void bn_subi(bignum256 *x, uint32_t y, const bignum256 *prime) {
  1244. assert(y < prime->val[0]);
  1245. // x = x + prime - y
  1246. uint32_t acc = -y;
  1247. for (int i = 0; i < BN_LIMBS; i++) {
  1248. acc += x->val[i] + prime->val[i];
  1249. // acc neither overflows 32 bits nor underflows 0
  1250. // Proof:
  1251. // acc + x[i] + prime[i]
  1252. // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1)
  1253. // <= 7 + 2**30 - 2 < 2**32
  1254. // acc + x[i] + prime[i]
  1255. // >= -y + prime[0] >= 0
  1256. x->val[i] = acc & BN_LIMB_MASK;
  1257. acc >>= BN_BITS_PER_LIMB;
  1258. // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
  1259. // acc == x[:i + 1] + prime[:i + 1] - y >> BITS_PER_LIMB * (i + 1)
  1260. }
  1261. // assert(acc == 0); // assert x + prime - y < 2**261
  1262. // acc == 0
  1263. // Proof:
  1264. // acc == x[:LIMBS] + prime[:LIMBS] - y >> BITS_PER_LIMB * LIMBS
  1265. // == x + prime - y >> BITS_PER_LIMB * LIMBS
  1266. // <= 2**(LIMBS * BITS_PER_LIMB) - 1 >> BITS_PER_LIMB * LIMBS == 0
  1267. }
  1268. // res = x - y % prime
  1269. // Explicitly res = x + (2 * prime - y)
  1270. // Assumes x, y are normalized, y is partly reduced
  1271. // Assumes x + 2 * prime - y < 2**261 == 2**(BITS_PER_LIMB * LIMBS)
  1272. // Guarantees res is normalized
  1273. // Assumes prime is nonzero and normalized
  1274. void bn_subtractmod(const bignum256 *x, const bignum256 *y, bignum256 *res,
  1275. const bignum256 *prime) {
  1276. // res = x + (2 * prime - y)
  1277. uint32_t acc = 1;
  1278. for (int i = 0; i < BN_LIMBS; i++) {
  1279. acc += (BN_BASE - 1) + x->val[i] + 2 * prime->val[i] - y->val[i];
  1280. // acc neither overflows 32 bits nor underflows 0
  1281. // Proof:
  1282. // acc + (BASE - 1) + x[i] + 2 * prime[i] - y[i]
  1283. // >= (BASE - 1) - y[i]
  1284. // == (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) == 0
  1285. // acc + (BASE - 1) + x[i] + 2 * prime[i] - y[i]
  1286. // <= acc + (BASE - 1) + x[i] + 2 * prime[i]
  1287. // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) +
  1288. // (2**BITS_PER_LIMB - 1) + 2 * (2**BITS_PER_LIMB - 1)
  1289. // <= (2**(32 - BITS_PER_LIMB) - 1) + 4 * (2**BITS_PER_LIMB - 1)
  1290. // == 7 + 4 * 2**29 - 4 == 2**31 + 3 < 2**32
  1291. res->val[i] = acc & (BN_BASE - 1);
  1292. acc >>= BN_BITS_PER_LIMB;
  1293. // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
  1294. // acc == 2**(BITS_PER_LIMB * (i + 1)) + x[:i+1] - y[:i+1] + 2*prime[:i+1]
  1295. // >> BITS_PER_LIMB * (i+1)
  1296. }
  1297. // assert(acc == 1); // assert x + 2 * prime - y < 2**261
  1298. // clang-format off
  1299. // acc == 1
  1300. // Proof:
  1301. // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] + 2 * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
  1302. // == 2**(BITS_PER_LIMB * LIMBS) + x - y + 2 * prime >> BITS_PER_LIMB * LIMBS
  1303. // == 2**(BITS_PER_LIMB * LIMBS) + x + (2 * prime - y) >> BITS_PER_LIMB * LIMBS
  1304. // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
  1305. // <= 2 * 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
  1306. // == 1
  1307. // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] + 2 * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
  1308. // == 2**(BITS_PER_LIMB * LIMBS) + x - y + 2 * prime >> BITS_PER_LIMB * LIMBS
  1309. // == 2**(BITS_PER_LIMB * LIMBS) + x + (2 * prime - y) >> BITS_PER_LIMB * LIMBS
  1310. // >= 2**(BITS_PER_LIMB * LIMBS) + 0 + 1 >> BITS_PER_LIMB * LIMBS
  1311. // == 1
  1312. // clang-format on
  1313. }
  1314. // res = x - y
  1315. // Assumes x, y are normalized and x >= y
  1316. // Guarantees res is normalized
  1317. // Works properly even if &x == &y or &x == &res or &y == &res or
  1318. // &x == &y == &res
  1319. void bn_subtract(const bignum256 *x, const bignum256 *y, bignum256 *res) {
  1320. uint32_t acc = 1;
  1321. for (int i = 0; i < BN_LIMBS; i++) {
  1322. acc += (BN_BASE - 1) + x->val[i] - y->val[i];
  1323. // acc neither overflows 32 bits nor underflows 0
  1324. // Proof:
  1325. // acc + (BASE - 1) + x[i] - y[i]
  1326. // >= (BASE - 1) - y == (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1)
  1327. // == 0
  1328. // acc + (BASE - 1) + x[i] - y[i]
  1329. // <= acc + (BASE - 1) + x[i]
  1330. // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) +
  1331. // (2**BITS_PER_LIMB - 1)
  1332. // == 7 + 2 * 2**29 < 2 **32
  1333. res->val[i] = acc & BN_LIMB_MASK;
  1334. acc >>= BN_BITS_PER_LIMB;
  1335. // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
  1336. // acc == 2**(BITS_PER_LIMB * (i + 1)) + x[:i + 1] - y[:i + 1]
  1337. // >> BITS_PER_LIMB * (i + 1)
  1338. }
  1339. // assert(acc == 1); // assert x >= y
  1340. // clang-format off
  1341. // acc == 1
  1342. // Proof:
  1343. // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] >> BITS_PER_LIMB * LIMBS
  1344. // == 2**(BITS_PER_LIMB * LIMBS) + x - y >> BITS_PER_LIMB * LIMBS
  1345. // == 2**(BITS_PER_LIMB * LIMBS) + x >> BITS_PER_LIMB * LIMBS
  1346. // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
  1347. // <= 2 * 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
  1348. // == 1
  1349. // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] >> BITS_PER_LIMB * LIMBS
  1350. // == 2**(BITS_PER_LIMB * LIMBS) + x - y >> BITS_PER_LIMB * LIMBS
  1351. // >= 2**(BITS_PER_LIMB * LIMBS) >> BITS_PER_LIMB * LIMBS
  1352. // == 1
  1353. }
  1354. // q = x // d, r = x % d
  1355. // Assumes x is normalized, 1 <= d <= 61304
  1356. // Guarantees q is normalized
  1357. void bn_long_division(bignum256 *x, uint32_t d, bignum256 *q, uint32_t *r) {
  1358. assert(1 <= d && d < 61304);
  1359. uint32_t acc = 0;
  1360. *r = x->val[BN_LIMBS - 1] % d;
  1361. q->val[BN_LIMBS - 1] = x->val[BN_LIMBS - 1] / d;
  1362. for (int i = BN_LIMBS - 2; i >= 0; i--) {
  1363. acc = *r * (BN_BASE % d) + x->val[i];
  1364. // acc doesn't overflow 32 bits
  1365. // Proof:
  1366. // r * (BASE % d) + x[i]
  1367. // <= (d - 1) * (d - 1) + (2**BITS_PER_LIMB - 1)
  1368. // == d**2 - 2*d + 2**BITS_PER_LIMB
  1369. // == 61304**2 - 2 * 61304 + 2**29
  1370. // == 3758057808 + 2**29 < 2**32
  1371. q->val[i] = *r * (BN_BASE / d) + (acc / d);
  1372. // q[i] doesn't overflow 32 bits
  1373. // Proof:
  1374. // r * (BASE // d) + (acc // d)
  1375. // <= (d - 1) * (2**BITS_PER_LIMB / d) +
  1376. // ((d**2 - 2*d + 2**BITS_PER_LIMB) / d)
  1377. // <= (d - 1) * (2**BITS_PER_LIMB / d) + (d - 2 + 2**BITS_PER_LIMB / d)
  1378. // == (d - 1 + 1) * (2**BITS_PER_LIMB / d) + d - 2
  1379. // == 2**BITS_PER_LIMB + d - 2 <= 2**29 + 61304 < 2**32
  1380. // q[i] == (r * BASE + x[i]) // d
  1381. // Proof:
  1382. // q[i] == r * (BASE // d) + (acc // d)
  1383. // == r * (BASE // d) + (r * (BASE % d) + x[i]) // d
  1384. // == (r * d * (BASE // d) + r * (BASE % d) + x[i]) // d
  1385. // == (r * (d * (BASE // d) + (BASE % d)) + x[i]) // d
  1386. // == (r * BASE + x[i]) // d
  1387. // q[i] < 2**BITS_PER_LIMB
  1388. // Proof:
  1389. // q[i] == (r * BASE + x[i]) // d
  1390. // <= ((d - 1) * 2**BITS_PER_LIMB + (2**BITS_PER_LIMB - 1)) / d
  1391. // == (d * 2**BITS_PER_LIMB - 1) / d == 2**BITS_PER_LIMB - 1 / d
  1392. // < 2**BITS_PER_LIMB
  1393. *r = acc % d;
  1394. // r == (r * BASE + x[i]) % d
  1395. // Proof:
  1396. // r == acc % d == (r * (BASE % d) + x[i]) % d
  1397. // == (r * BASE + x[i]) % d
  1398. // x[:i] == q[:i] * d + r
  1399. }
  1400. }
  1401. // x = x // 58, r = x % 58
  1402. // Assumes x is normalized
  1403. // Guarantees x is normalized
  1404. void bn_divmod58(bignum256 *x, uint32_t *r) { bn_long_division(x, 58, x, r); }
  1405. // x = x // 1000, r = x % 1000
  1406. // Assumes x is normalized
  1407. // Guarantees x is normalized
  1408. void bn_divmod1000(bignum256 *x, uint32_t *r) {
  1409. bn_long_division(x, 1000, x, r);
  1410. }
  1411. // x = x // 10, r = x % 10
  1412. // Assumes x is normalized
  1413. // Guarantees x is normalized
  1414. void bn_divmod10(bignum256 *x, uint32_t *r) { bn_long_division(x, 10, x, r); }
  1415. // Formats amount
  1416. // Assumes amount is normalized
  1417. // Assumes prefix and suffix are null-terminated strings
  1418. // Assumes output is an array of length output_length
  1419. // The function doesn't have neither constant control flow nor constant memory
  1420. // access flow with regard to any its argument
  1421. size_t bn_format(const bignum256 *amount, const char *prefix, const char *suffix, unsigned int decimals, int exponent, bool trailing, char thousands, char *output, size_t output_length) {
  1422. /*
  1423. Python prototype of the function:
  1424. def format(amount, prefix, suffix, decimals, exponent, trailing, thousands):
  1425. if exponent >= 0:
  1426. amount *= 10**exponent
  1427. else:
  1428. amount //= 10 ** (-exponent)
  1429. d = pow(10, decimals)
  1430. integer_part = amount // d
  1431. integer_str = f"{integer_part:,}".replace(",", thousands or "")
  1432. if decimals:
  1433. decimal_part = amount % d
  1434. decimal_str = f".{decimal_part:0{decimals}d}"
  1435. if not trailing:
  1436. decimal_str = decimal_str.rstrip("0").rstrip(".")
  1437. else:
  1438. decimal_str = ""
  1439. return prefix + integer_str + decimal_str + suffix
  1440. */
  1441. // Auxiliary macro for bn_format
  1442. // If enough space adds one character to output starting from the end
  1443. #define BN_FORMAT_ADD_OUTPUT_CHAR(c) \
  1444. { \
  1445. --position; \
  1446. if (output <= position && position < output + output_length) { \
  1447. *position = (c); \
  1448. } else { \
  1449. memset(output, '\0', output_length); \
  1450. return 0; \
  1451. } \
  1452. }
  1453. bignum256 temp = {0};
  1454. bn_copy(amount, &temp);
  1455. uint32_t digit = 0;
  1456. char *position = output + output_length;
  1457. // Add string ending character
  1458. BN_FORMAT_ADD_OUTPUT_CHAR('\0');
  1459. // Add suffix
  1460. size_t suffix_length = suffix ? strlen(suffix) : 0;
  1461. for (int i = suffix_length - 1; i >= 0; --i)
  1462. BN_FORMAT_ADD_OUTPUT_CHAR(suffix[i])
  1463. // amount //= 10**exponent
  1464. for (; exponent < 0; ++exponent) {
  1465. // if temp == 0, there is no need to divide it by 10 anymore
  1466. if (bn_is_zero(&temp)) {
  1467. exponent = 0;
  1468. break;
  1469. }
  1470. bn_divmod10(&temp, &digit);
  1471. }
  1472. // exponent >= 0 && decimals >= 0
  1473. bool fractional_part = false; // is fractional-part of amount present
  1474. { // Add fractional-part digits of amount
  1475. // Add trailing zeroes
  1476. unsigned int trailing_zeros = decimals < (unsigned int) exponent ? decimals : (unsigned int) exponent;
  1477. // When casting a negative int to unsigned int, UINT_MAX is added to the int before
  1478. // Since exponent >= 0, the value remains unchanged
  1479. decimals -= trailing_zeros;
  1480. exponent -= trailing_zeros;
  1481. if (trailing && trailing_zeros) {
  1482. fractional_part = true;
  1483. for (; trailing_zeros > 0; --trailing_zeros)
  1484. BN_FORMAT_ADD_OUTPUT_CHAR('0')
  1485. }
  1486. // exponent == 0 || decimals == 0
  1487. // Add significant digits and leading zeroes
  1488. for (; decimals > 0; --decimals) {
  1489. bn_divmod10(&temp, &digit);
  1490. if (fractional_part || digit || trailing) {
  1491. fractional_part = true;
  1492. BN_FORMAT_ADD_OUTPUT_CHAR('0' + digit)
  1493. }
  1494. else if (bn_is_zero(&temp)) {
  1495. // We break since the remaining digits are zeroes and fractional_part == trailing == false
  1496. decimals = 0;
  1497. break;
  1498. }
  1499. }
  1500. // decimals == 0
  1501. }
  1502. if (fractional_part) {
  1503. BN_FORMAT_ADD_OUTPUT_CHAR('.')
  1504. }
  1505. { // Add integer-part digits of amount
  1506. // Add trailing zeroes
  1507. int digits = 0;
  1508. if (!bn_is_zero(&temp)) {
  1509. for (; exponent > 0; --exponent) {
  1510. ++digits;
  1511. BN_FORMAT_ADD_OUTPUT_CHAR('0')
  1512. if (thousands != 0 && digits % 3 == 0) {
  1513. BN_FORMAT_ADD_OUTPUT_CHAR(thousands)
  1514. }
  1515. }
  1516. }
  1517. // decimals == 0 && exponent == 0
  1518. // Add significant digits
  1519. bool is_zero = false;
  1520. do {
  1521. ++digits;
  1522. bn_divmod10(&temp, &digit);
  1523. is_zero = bn_is_zero(&temp);
  1524. BN_FORMAT_ADD_OUTPUT_CHAR('0' + digit)
  1525. if (thousands != 0 && !is_zero && digits % 3 == 0) {
  1526. BN_FORMAT_ADD_OUTPUT_CHAR(thousands)
  1527. }
  1528. } while (!is_zero);
  1529. }
  1530. // Add prefix
  1531. size_t prefix_length = prefix ? strlen(prefix) : 0;
  1532. for (int i = prefix_length - 1; i >= 0; --i)
  1533. BN_FORMAT_ADD_OUTPUT_CHAR(prefix[i])
  1534. // Move formatted amount to the start of output
  1535. int length = output - position + output_length;
  1536. memmove(output, position, length);
  1537. return length - 1;
  1538. }
  1539. #if USE_BN_PRINT
  1540. // Prints x in hexadecimal
  1541. // Assumes x is normalized and x < 2**256
  1542. void bn_print(const bignum256 *x) {
  1543. printf("%06x", x->val[8]);
  1544. printf("%08x", ((x->val[7] << 3) | (x->val[6] >> 26)));
  1545. printf("%07x", ((x->val[6] << 2) | (x->val[5] >> 27)) & 0x0FFFFFFF);
  1546. printf("%07x", ((x->val[5] << 1) | (x->val[4] >> 28)) & 0x0FFFFFFF);
  1547. printf("%07x", x->val[4] & 0x0FFFFFFF);
  1548. printf("%08x", ((x->val[3] << 3) | (x->val[2] >> 26)));
  1549. printf("%07x", ((x->val[2] << 2) | (x->val[1] >> 27)) & 0x0FFFFFFF);
  1550. printf("%07x", ((x->val[1] << 1) | (x->val[0] >> 28)) & 0x0FFFFFFF);
  1551. printf("%07x", x->val[0] & 0x0FFFFFFF);
  1552. }
  1553. // Prints comma separated list of limbs of x
  1554. void bn_print_raw(const bignum256 *x) {
  1555. for (int i = 0; i < BN_LIMBS - 1; i++) {
  1556. printf("0x%08x, ", x->val[i]);
  1557. }
  1558. printf("0x%08x", x->val[BN_LIMBS - 1]);
  1559. }
  1560. #endif
  1561. #if USE_INVERSE_FAST
  1562. void bn_inverse(bignum256 *x, const bignum256 *prime) {
  1563. bn_inverse_fast(x, prime);
  1564. }
  1565. #else
  1566. void bn_inverse(bignum256 *x, const bignum256 *prime) {
  1567. bn_inverse_slow(x, prime);
  1568. }
  1569. #endif