| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848 |
- /**
- * Copyright (c) 2013-2014 Tomas Dzetkulic
- * Copyright (c) 2013-2014 Pavol Rusnak
- * Copyright (c) 2015 Jochen Hoenicke
- * Copyright (c) 2016 Alex Beregszaszi
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
- * OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
- #include "bignum.h"
- #include <assert.h>
- #include <stdint.h>
- #include <stdio.h>
- #include <string.h>
- #include "memzero.h"
- #include "script.h"
- /*
- This library implements 256-bit numbers arithmetic.
- An unsigned 256-bit number is represented by a bignum256 structure, that is an
- array of nine 32-bit values called limbs. Limbs are digits of the number in
- the base 2**29 representation in the little endian order. This means that
- bignum256 x;
- represents the value
- sum([x[i] * 2**(29*i) for i in range(9)).
- A limb of a bignum256 is *normalized* iff it's less than 2**29.
- A bignum256 is *normalized* iff every its limb is normalized.
- A number is *fully reduced modulo p* iff it is less than p.
- A number is *partly reduced modulo p* iff is is less than 2*p.
- The number p is usually a prime number such that 2^256 - 2^224 <= p <= 2^256.
- All functions except bn_fast_mod expect that all their bignum256 inputs are
- normalized. (The function bn_fast_mod allows the input number to have the
- most significant limb unnormalized). All bignum256 outputs of all functions
- are guaranteed to be normalized.
- A number can be partly reduced with bn_fast_mod, a partly reduced number can
- be fully reduced with bn_mod.
- A function has *constant control flow with regard to its argument* iff the
- order in which instructions of the function are executed doesn't depend on the
- value of the argument.
- A function has *constant memory access flow with regard to its argument* iff
- the memory addresses that are acessed and the order in which they are accessed
- don't depend on the value of the argument.
- A function *has contant control (memory access) flow* iff it has constant
- control (memory access) flow with regard to all its arguments.
- The following function has contant control flow with regard to its arugment
- n, however is doesn't have constant memory access flow with regard to it:
- void (int n, int *a) }
- a[0] = 0;
- a[n] = 0; // memory address reveals the value of n
- }
- Unless stated otherwise all functions are supposed to have both constant
- control flow and constant memory access flow.
- */
- #define BN_MAX_DECIMAL_DIGITS \
- 79 // floor(log(2**(LIMBS * BITS_PER_LIMB), 10)) + 1
- // out_number = (bignum256) in_number
- // Assumes in_number is a raw bigendian 256-bit number
- // Guarantees out_number is normalized
- void bn_read_be(const uint8_t *in_number, bignum256 *out_number) {
- uint32_t temp = 0;
- for (int i = 0; i < BN_LIMBS - 1; i++) {
- uint32_t limb = read_be(in_number + (BN_LIMBS - 2 - i) * 4);
- temp |= limb << (BN_EXTRA_BITS * i);
- out_number->val[i] = temp & BN_LIMB_MASK;
- temp = limb >> (32 - BN_EXTRA_BITS * (i + 1));
- }
- out_number->val[BN_LIMBS - 1] = temp;
- }
- // out_number = (256BE) in_number
- // Assumes in_number < 2**256
- // Guarantess out_number is a raw bigendian 256-bit number
- void bn_write_be(const bignum256 *in_number, uint8_t *out_number) {
- uint32_t temp = in_number->val[BN_LIMBS - 1];
- for (int i = BN_LIMBS - 2; i >= 0; i--) {
- uint32_t limb = in_number->val[i];
- temp = (temp << (BN_BITS_PER_LIMB - BN_EXTRA_BITS * i)) |
- (limb >> (BN_EXTRA_BITS * i));
- write_be(out_number + (BN_LIMBS - 2 - i) * 4, temp);
- temp = limb;
- }
- }
- // out_number = (bignum256) in_number
- // Assumes in_number is a raw little endian 256-bit number
- // Guarantees out_number is normalized
- void bn_read_le(const uint8_t *in_number, bignum256 *out_number) {
- uint32_t temp = 0;
- for (int i = 0; i < BN_LIMBS - 1; i++) {
- uint32_t limb = read_le(in_number + i * 4);
- temp |= limb << (BN_EXTRA_BITS * i);
- out_number->val[i] = temp & BN_LIMB_MASK;
- temp = limb >> (32 - BN_EXTRA_BITS * (i + 1));
- }
- out_number->val[BN_LIMBS - 1] = temp;
- }
- // out_number = (256LE) in_number
- // Assumes in_number < 2**256
- // Guarantess out_number is a raw little endian 256-bit number
- void bn_write_le(const bignum256 *in_number, uint8_t *out_number) {
- uint32_t temp = in_number->val[BN_LIMBS - 1];
- for (int i = BN_LIMBS - 2; i >= 0; i--) {
- uint32_t limb = in_number->val[i];
- temp = (temp << (BN_BITS_PER_LIMB - BN_EXTRA_BITS * i)) |
- (limb >> (BN_EXTRA_BITS * i));
- write_le(out_number + i * 4, temp);
- temp = limb;
- }
- }
- // out_number = (bignum256) in_number
- // Guarantees out_number is normalized
- void bn_read_uint32(uint32_t in_number, bignum256 *out_number) {
- out_number->val[0] = in_number & BN_LIMB_MASK;
- out_number->val[1] = in_number >> BN_BITS_PER_LIMB;
- for (uint32_t i = 2; i < BN_LIMBS; i++) out_number->val[i] = 0;
- }
- // out_number = (bignum256) in_number
- // Guarantees out_number is normalized
- void bn_read_uint64(uint64_t in_number, bignum256 *out_number) {
- out_number->val[0] = in_number & BN_LIMB_MASK;
- out_number->val[1] = (in_number >>= BN_BITS_PER_LIMB) & BN_LIMB_MASK;
- out_number->val[2] = in_number >> BN_BITS_PER_LIMB;
- for (uint32_t i = 3; i < BN_LIMBS; i++) out_number->val[i] = 0;
- }
- // Returns the bitsize of x
- // Assumes x is normalized
- // The function doesn't have neither constant control flow nor constant memory
- // access flow
- int bn_bitcount(const bignum256 *x) {
- for (int i = BN_LIMBS - 1; i >= 0; i--) {
- uint32_t limb = x->val[i];
- if (limb != 0) {
- // __builtin_clz returns the number of leading zero bits starting at the
- // most significant bit position
- return i * BN_BITS_PER_LIMB + (32 - __builtin_clz(limb));
- }
- }
- return 0;
- }
- // Returns the number of decimal digits of x; if x is 0, returns 1
- // Assumes x is normalized
- // The function doesn't have neither constant control flow nor constant memory
- // access flow
- unsigned int bn_digitcount(const bignum256 *x) {
- bignum256 val = {0};
- bn_copy(x, &val);
- unsigned int digits = 1;
- for (unsigned int i = 0; i < BN_MAX_DECIMAL_DIGITS; i += 3) {
- uint32_t limb = 0;
- bn_divmod1000(&val, &limb);
- if (limb >= 100) {
- digits = i + 3;
- } else if (limb >= 10) {
- digits = i + 2;
- } else if (limb >= 1) {
- digits = i + 1;
- }
- }
- memzero(&val, sizeof(val));
- return digits;
- }
- // x = 0
- // Guarantees x is normalized
- void bn_zero(bignum256 *x) {
- for (int i = 0; i < BN_LIMBS; i++) {
- x->val[i] = 0;
- }
- }
- // x = 1
- // Guarantees x is normalized
- void bn_one(bignum256 *x) {
- x->val[0] = 1;
- for (int i = 1; i < BN_LIMBS; i++) {
- x->val[i] = 0;
- }
- }
- // Returns x == 0
- // Assumes x is normalized
- int bn_is_zero(const bignum256 *x) {
- uint32_t result = 0;
- for (int i = 0; i < BN_LIMBS; i++) {
- result |= x->val[i];
- }
- return !result;
- }
- // Returns x == 1
- // Assumes x is normalized
- int bn_is_one(const bignum256 *x) {
- uint32_t result = x->val[0] ^ 1;
- for (int i = 1; i < BN_LIMBS; i++) {
- result |= x->val[i];
- }
- return !result;
- }
- // Returns x < y
- // Assumes x, y are normalized
- int bn_is_less(const bignum256 *x, const bignum256 *y) {
- uint32_t res1 = 0;
- uint32_t res2 = 0;
- for (int i = BN_LIMBS - 1; i >= 0; i--) {
- res1 = (res1 << 1) | (x->val[i] < y->val[i]);
- res2 = (res2 << 1) | (x->val[i] > y->val[i]);
- }
- return res1 > res2;
- }
- // Returns x == y
- // Assumes x, y are normalized
- int bn_is_equal(const bignum256 *x, const bignum256 *y) {
- uint32_t result = 0;
- for (int i = 0; i < BN_LIMBS; i++) {
- result |= x->val[i] ^ y->val[i];
- }
- return !result;
- }
- // res = cond if truecase else falsecase
- // Assumes cond is either 0 or 1
- // Works properly even if &res == &truecase or &res == &falsecase or
- // &truecase == &falsecase or &res == &truecase == &falsecase
- void bn_cmov(bignum256 *res, volatile uint32_t cond, const bignum256 *truecase,
- const bignum256 *falsecase) {
- // Intentional use of bitwise OR operator to ensure constant-time
- assert((int)(cond == 1) | (int)(cond == 0));
- uint32_t tmask = -cond; // tmask = 0xFFFFFFFF if cond else 0x00000000
- uint32_t fmask = ~tmask; // fmask = 0x00000000 if cond else 0xFFFFFFFF
- for (int i = 0; i < BN_LIMBS; i++) {
- res->val[i] = (truecase->val[i] & tmask) | (falsecase->val[i] & fmask);
- }
- }
- // x = -x % prime if cond else x,
- // Explicitly x = (3 * prime - x if x > prime else 2 * prime - x) if cond else
- // else (x if x > prime else x + prime)
- // Assumes x is normalized and partly reduced
- // Assumes cond is either 1 or 0
- // Guarantees x is normalized
- // Assumes prime is normalized and
- // 0 < prime < 2**260 == 2**(BITS_PER_LIMB * LIMBS - 1)
- void bn_cnegate(volatile uint32_t cond, bignum256 *x, const bignum256 *prime) {
- // Intentional use of bitwise OR operator to ensure constant time
- assert((int)(cond == 1) | (int)(cond == 0));
- uint32_t tmask = -cond; // tmask = 0xFFFFFFFF if cond else 0x00000000
- uint32_t fmask = ~tmask; // fmask = 0x00000000 if cond else 0xFFFFFFFF
- bn_mod(x, prime);
- // x < prime
- uint32_t acc1 = 1;
- uint32_t acc2 = 0;
- for (int i = 0; i < BN_LIMBS; i++) {
- acc1 += (BN_BASE - 1) + 2 * prime->val[i] - x->val[i];
- // acc1 neither overflows 32 bits nor underflows 0
- // Proof:
- // acc1 + (BASE - 1) + 2 * prime[i] - x[i]
- // >= (BASE - 1) - x >= (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1)
- // == 0
- // acc1 + (BASE - 1) + 2 * prime[i] - x[i]
- // <= acc1 + (BASE - 1) + 2 * prime[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1) +
- // (2**BITS_PER_LIMB - 1)
- // == 7 + 3 * 2**29 < 2**32
- acc2 += prime->val[i] + x->val[i];
- // acc2 doesn't overflow 32 bits
- // Proof:
- // acc2 + prime[i] + x[i]
- // <= 2**(32 - BITS_PER_LIMB) - 1 + 2 * (2**BITS_PER_LIMB - 1)
- // == 2**(32 - BITS_PER_LIMB) + 2**(BITS_PER_LIMB + 1) - 2
- // == 2**30 + 5 < 2**32
- // x = acc1 & LIMB_MASK if cond else acc2 & LIMB_MASK
- x->val[i] = ((acc1 & tmask) | (acc2 & fmask)) & BN_LIMB_MASK;
- acc1 >>= BN_BITS_PER_LIMB;
- // acc1 <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc1 == 2**(BITS_PER_LIMB * (i + 1)) + 2 * prime[:i + 1] - x[:i + 1]
- // >> BITS_PER_LIMB * (i + 1)
- acc2 >>= BN_BITS_PER_LIMB;
- // acc2 <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc2 == prime[:i + 1] + x[:i + 1] >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc1 == 1); // assert prime <= 2**260
- // assert(acc2 == 0);
- // clang-format off
- // acc1 == 1
- // Proof:
- // acc1 == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime[:LIMBS] - x[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime - x >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) + 2 * prime >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) + 2 * (2**(BITS_PER_LIMB * LIMBS - 1) - 1) >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 2 >> BITS_PER_LIMB * LIMBS
- // == 1
- // acc1 == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime[:LIMBS] - x[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime - x >> BITS_PER_LIMB * LIMBS
- // >= 2**(BITS_PER_LIMB * LIMBS) + 0 >> BITS_PER_LIMB * LIMBS
- // == 1
- // acc2 == 0
- // Proof:
- // acc2 == prime[:LIMBS] + x[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == prime + x >> BITS_PER_LIMB * LIMBS
- // <= 2 * prime - 1 >> BITS_PER_LIMB * LIMBS
- // <= 2 * (2**(BITS_PER_LIMB * LIMBS - 1) - 1) - 1 >> 261
- // == 2**(BITS_PER_LIMB * LIMBS) - 3 >> BITS_PER_LIMB * LIMBS
- // == 0
- // clang-format on
- }
- // x <<= 1
- // Assumes x is normalized, x < 2**260 == 2**(LIMBS*BITS_PER_LIMB - 1)
- // Guarantees x is normalized
- void bn_lshift(bignum256 *x) {
- for (int i = BN_LIMBS - 1; i > 0; i--) {
- x->val[i] = ((x->val[i] << 1) & BN_LIMB_MASK) |
- (x->val[i - 1] >> (BN_BITS_PER_LIMB - 1));
- }
- x->val[0] = (x->val[0] << 1) & BN_LIMB_MASK;
- }
- // x >>= 1, i.e. x = floor(x/2)
- // Assumes x is normalized
- // Guarantees x is normalized
- // If x is partly reduced (fully reduced) modulo prime,
- // guarantess x will be partly reduced (fully reduced) modulo prime
- void bn_rshift(bignum256 *x) {
- for (int i = 0; i < BN_LIMBS - 1; i++) {
- x->val[i] =
- (x->val[i] >> 1) | ((x->val[i + 1] & 1) << (BN_BITS_PER_LIMB - 1));
- }
- x->val[BN_LIMBS - 1] >>= 1;
- }
- // Sets i-th least significant bit (counting from zero)
- // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
- // Guarantees x is normalized
- // The function has constant control flow but not constant memory access flow
- // with regard to i
- void bn_setbit(bignum256 *x, uint16_t i) {
- assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
- x->val[i / BN_BITS_PER_LIMB] |= (1u << (i % BN_BITS_PER_LIMB));
- }
- // clears i-th least significant bit (counting from zero)
- // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
- // Guarantees x is normalized
- // The function has constant control flow but not constant memory access flow
- // with regard to i
- void bn_clearbit(bignum256 *x, uint16_t i) {
- assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
- x->val[i / BN_BITS_PER_LIMB] &= ~(1u << (i % BN_BITS_PER_LIMB));
- }
- // returns i-th least significant bit (counting from zero)
- // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
- // The function has constant control flow but not constant memory access flow
- // with regard to i
- uint32_t bn_testbit(const bignum256 *x, uint16_t i) {
- assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
- return (x->val[i / BN_BITS_PER_LIMB] >> (i % BN_BITS_PER_LIMB)) & 1;
- }
- // res = x ^ y
- // Assumes x, y are normalized
- // Guarantees res is normalized
- // Works properly even if &res == &x or &res == &y or &res == &x == &y
- void bn_xor(bignum256 *res, const bignum256 *x, const bignum256 *y) {
- for (int i = 0; i < BN_LIMBS; i++) {
- res->val[i] = x->val[i] ^ y->val[i];
- }
- }
- // x = x / 2 % prime
- // Explicitly x = x / 2 if is_even(x) else (x + prime) / 2
- // Assumes x is normalized, x + prime < 261 == LIMBS * BITS_PER_LIMB
- // Guarantees x is normalized
- // If x is partly reduced (fully reduced) modulo prime,
- // guarantess x will be partly reduced (fully reduced) modulo prime
- // Assumes prime is an odd number and normalized
- void bn_mult_half(bignum256 *x, const bignum256 *prime) {
- // x = x / 2 if is_even(x) else (x + prime) / 2
- uint32_t x_is_odd_mask =
- -(x->val[0] & 1); // x_is_odd_mask = 0xFFFFFFFF if is_odd(x) else 0
- uint32_t acc = (x->val[0] + (prime->val[0] & x_is_odd_mask)) >> 1;
- // acc < 2**BITS_PER_LIMB
- // Proof:
- // acc == x[0] + prime[0] & x_is_odd_mask >> 1
- // <= (2**(BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB) - 1) >> 1
- // == 2**(BITS_PER_LIMB + 1) - 2 >> 1
- // < 2**(BITS_PER_LIMB)
- for (int i = 0; i < BN_LIMBS - 1; i++) {
- uint32_t temp = (x->val[i + 1] + (prime->val[i + 1] & x_is_odd_mask));
- // temp < 2**(BITS_PER_LIMB + 1)
- // Proof:
- // temp == x[i + 1] + val[i + 1] & x_is_odd_mask
- // <= (2**(BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB) - 1)
- // < 2**(BITS_PER_LIMB + 1)
- acc += (temp & 1) << (BN_BITS_PER_LIMB - 1);
- // acc doesn't overflow 32 bits
- // Proof:
- // acc + (temp & 1 << BITS_PER_LIMB - 1)
- // <= 2**(BITS_PER_LIMB + 1) + 2**(BITS_PER_LIMB - 1)
- // <= 2**30 + 2**28 < 2**32
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- acc += temp >> 1;
- // acc < 2**(BITS_PER_LIMB + 1)
- // Proof:
- // acc + (temp >> 1)
- // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB + 1) - 1 >> 1)
- // == 7 + 2**(BITS_PER_LIMB) - 1 < 2**(BITS_PER_LIMB + 1)
- // acc == x[:i+2]+(prime[:i+2] & x_is_odd_mask) >> BITS_PER_LIMB * (i+1)
- }
- x->val[BN_LIMBS - 1] = acc;
- // assert(acc >> BITS_PER_LIMB == 0);
- // acc >> BITS_PER_LIMB == 0
- // Proof:
- // acc
- // == x[:LIMBS] + (prime[:LIMBS] & x_is_odd_mask) >> BITS_PER_LIMB*LIMBS
- // == x + (prime & x_is_odd_mask) >> BITS_PER_LIMB * LIMBS
- // <= x + prime >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
- // == 0
- }
- // x = x * k % prime
- // Assumes x is normalized, 0 <= k <= 8 = 2**(32 - BITS_PER_LIMB)
- // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
- // Guarantees x is normalized and partly reduced modulo prime
- void bn_mult_k(bignum256 *x, uint8_t k, const bignum256 *prime) {
- assert(k <= 8);
- for (int i = 0; i < BN_LIMBS; i++) {
- x->val[i] = k * x->val[i];
- // x[i] doesn't overflow 32 bits
- // k * x[i] <= 2**(32 - BITS_PER_LIMB) * (2**BITS_PER_LIMB - 1)
- // < 2**(32 - BITS_PER_LIMB) * 2**BITS_PER_LIMB == 2**32
- }
- bn_fast_mod(x, prime);
- }
- // Reduces partly reduced x modulo prime
- // Explicitly x = x if x < prime else x - prime
- // Assumes x is partly reduced modulo prime
- // Guarantees x is fully reduced modulo prime
- // Assumes prime is nonzero and normalized
- void bn_mod(bignum256 *x, const bignum256 *prime) {
- uint32_t x_less_prime = bn_is_less(x, prime);
- bignum256 temp = {0};
- bn_subtract(x, prime, &temp);
- bn_cmov(x, x_less_prime, x, &temp);
- memzero(&temp, sizeof(temp));
- }
- // Auxiliary function for bn_multiply
- // res = k * x
- // Assumes k and x are normalized
- // Guarantees res is normalized 18 digit little endian number in base 2**29
- void bn_multiply_long(const bignum256 *k, const bignum256 *x,
- uint32_t res[2 * BN_LIMBS]) {
- // Uses long multiplication in base 2**29, see
- // https://en.wikipedia.org/wiki/Multiplication_algorithm#Long_multiplication
- uint64_t acc = 0;
- // compute lower half
- for (int i = 0; i < BN_LIMBS; i++) {
- for (int j = 0; j <= i; j++) {
- acc += k->val[j] * (uint64_t)x->val[i - j];
- // acc doesn't overflow 64 bits
- // Proof:
- // acc <= acc + sum([k[j] * x[i-j] for j in range(i)])
- // <= (2**(64 - BITS_PER_LIMB) - 1) +
- // LIMBS * (2**BITS_PER_LIMB - 1) * (2**BITS_PER_LIMB - 1)
- // == (2**35 - 1) + 9 * (2**29 - 1) * (2**29 - 1)
- // <= 2**35 + 9 * 2**58 < 2**64
- }
- res[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 2**35 - 1 == 2**(64 - BITS_PER_LIMB) - 1
- }
- // compute upper half
- for (int i = BN_LIMBS; i < 2 * BN_LIMBS - 1; i++) {
- for (int j = i - BN_LIMBS + 1; j < BN_LIMBS; j++) {
- acc += k->val[j] * (uint64_t)x->val[i - j];
- // acc doesn't overflow 64 bits
- // Proof:
- // acc <= acc + sum([k[j] * x[i-j] for j in range(i)])
- // <= (2**(64 - BITS_PER_LIMB) - 1)
- // LIMBS * (2**BITS_PER_LIMB - 1) * (2**BITS_PER_LIMB - 1)
- // == (2**35 - 1) + 9 * (2**29 - 1) * (2**29 - 1)
- // <= 2**35 + 9 * 2**58 < 2**64
- }
- res[i] = acc & (BN_BASE - 1);
- acc >>= BN_BITS_PER_LIMB;
- // acc < 2**35 == 2**(64 - BITS_PER_LIMB)
- }
- res[2 * BN_LIMBS - 1] = acc;
- }
- // Auxiliary function for bn_multiply
- // Assumes 0 <= d <= 8 == LIMBS - 1
- // Assumes res is normalized and res < 2**(256 + 29*d + 31)
- // Guarantess res in normalized and res < 2 * prime * 2**(29*d)
- // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
- void bn_multiply_reduce_step(uint32_t res[2 * BN_LIMBS], const bignum256 *prime,
- uint32_t d) {
- // clang-format off
- // Computes res = res - (res // 2**(256 + BITS_PER_LIMB * d)) * prime * 2**(BITS_PER_LIMB * d)
- // res - (res // 2**(256 + BITS_PER_LIMB * d)) * prime * 2**(BITS_PER_LIMB * d) < 2 * prime * 2**(BITS_PER_LIMB * d)
- // Proof:
- // res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * prime
- // == res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * (2**256 - (2**256 - prime))
- // == res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * 2**256 + res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * (2**256 - prime)
- // == (res % 2**(256 + BITS_PER_LIMB * d)) + res // (2**256 + BITS_PER_LIMB * d) * 2**(BITS_PER_LIMB * d) * (2**256 - prime)
- // <= (2**(256 + 29*d + 31) % 2**(256 + 29*d)) + (2**(256 + 29*d + 31) - 1) / (2**256 + 29*d) * 2**(29*d) * (2**256 - prime)
- // <= 2**(256 + 29*d) + 2**(256 + 29*d + 31) / (2**256 + 29*d) * 2**(29*d) * (2**256 - prime)
- // == 2**(256 + 29*d) + 2**31 * 2**(29*d) * (2**256 - prime)
- // == 2**(29*d) * (2**256 + 2**31 * (2*256 - prime))
- // <= 2**(29*d) * (2**256 + 2**31 * 2*224)
- // <= 2**(29*d) * (2**256 + 2**255)
- // <= 2**(29*d) * 2 * (2**256 - 2**224)
- // <= 2 * prime * 2**(29*d)
- // clang-format on
- uint32_t coef =
- (res[d + BN_LIMBS - 1] >> (256 - (BN_LIMBS - 1) * BN_BITS_PER_LIMB)) +
- (res[d + BN_LIMBS] << ((BN_LIMBS * BN_BITS_PER_LIMB) - 256));
- // coef == res // 2**(256 + BITS_PER_LIMB * d)
- // coef < 2**31
- // Proof:
- // coef == res // 2**(256 + BITS_PER_LIMB * d)
- // < 2**(256 + 29 * d + 31) // 2**(256 + 29 * d)
- // == 2**31
- const int shift = 31;
- uint64_t acc = 1ull << shift;
- for (int i = 0; i < BN_LIMBS; i++) {
- acc += (((uint64_t)(BN_BASE - 1)) << shift) + res[d + i] -
- prime->val[i] * (uint64_t)coef;
- // acc neither overflow 64 bits nor underflow zero
- // Proof:
- // acc + ((BASE - 1) << shift) + res[d + i] - prime[i] * coef
- // >= ((BASE - 1) << shift) - prime[i] * coef
- // == 2**shift * (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) *
- // (2**31 - 1)
- // == (2**shift - 2**31 + 1) * (2**BITS_PER_LIMB - 1)
- // == (2**31 - 2**31 + 1) * (2**29 - 1)
- // == 2**29 - 1 > 0
- // acc + ((BASE - 1) << shift) + res[d + i] - prime[i] * coef
- // <= acc + ((BASE - 1) << shift) + res[d+i]
- // <= (2**(64 - BITS_PER_LIMB) - 1) + 2**shift * (2**BITS_PER_LIMB - 1)
- // + (2*BITS_PER_LIMB - 1)
- // == (2**(64 - BITS_PER_LIMB) - 1) + (2**shift + 1) *
- // (2**BITS_PER_LIMB - 1)
- // == (2**35 - 1) + (2**31 + 1) * (2**29 - 1)
- // <= 2**35 + 2**60 + 2**29 < 2**64
- res[d + i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 2**(64 - BITS_PER_LIMB) - 1 == 2**35 - 1
- // acc == (1 << BITS_PER_LIMB * (i + 1) + shift) + res[d : d + i + 1]
- // - coef * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
- }
- // acc += (((uint64_t)(BASE - 1)) << shift) + res[d + LIMBS];
- // acc >>= BITS_PER_LIMB;
- // assert(acc <= 1ul << shift);
- // clang-format off
- // acc == 1 << shift
- // Proof:
- // acc
- // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime[:LIMBS] >> BITS_PER_LIMB * (LIMBS + 1)
- // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime >> BITS_PER_LIMB * (LIMBS + 1)
- // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res[d : d + LIMBS + 1] - coef * prime) >> BITS_PER_LIMB * (LIMBS + 1)
- // <= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res[:d] + BASE**d * res[d : d + LIMBS + 1] - BASE**d * coef * prime)//BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
- // <= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res - BASE**d * coef * prime) // BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
- // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (2 * prime * BASE**d) // BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
- // <= (1 << 321) + 2 * 2**256 >> 290
- // == 1 << 31 == 1 << shift
- // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime[:LIMBS + 1] >> BITS_PER_LIMB * (LIMBS + 1)
- // >= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + 0 >> BITS_PER_LIMB * (LIMBS + 1)
- // == 1 << shift
- // clang-format on
- res[d + BN_LIMBS] = 0;
- }
- // Auxiliary function for bn_multiply
- // Partly reduces res and stores both in x and res
- // Assumes res in normalized and res < 2**519
- // Guarantees x is normalized and partly reduced modulo prime
- // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
- void bn_multiply_reduce(bignum256 *x, uint32_t res[2 * BN_LIMBS],
- const bignum256 *prime) {
- for (int i = BN_LIMBS - 1; i >= 0; i--) {
- // res < 2**(256 + 29*i + 31)
- // Proof:
- // if i == LIMBS - 1:
- // res < 2**519
- // == 2**(256 + 29 * 8 + 31)
- // == 2**(256 + 29 * (LIMBS - 1) + 31)
- // else:
- // res < 2 * prime * 2**(29 * (i + 1))
- // <= 2**256 * 2**(29*i + 29) < 2**(256 + 29*i + 31)
- bn_multiply_reduce_step(res, prime, i);
- }
- for (int i = 0; i < BN_LIMBS; i++) {
- x->val[i] = res[i];
- }
- }
- // x = k * x % prime
- // Assumes k, x are normalized, k * x < 2**519
- // Guarantees x is normalized and partly reduced modulo prime
- // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
- void bn_multiply(const bignum256 *k, bignum256 *x, const bignum256 *prime) {
- uint32_t res[2 * BN_LIMBS] = {0};
- bn_multiply_long(k, x, res);
- bn_multiply_reduce(x, res, prime);
- memzero(res, sizeof(res));
- }
- // Partly reduces x modulo prime
- // Assumes limbs of x except the last (the most significant) one are normalized
- // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
- // Guarantees x is normalized and partly reduced modulo prime
- void bn_fast_mod(bignum256 *x, const bignum256 *prime) {
- // Computes x = x - (x // 2**256) * prime
- // x < 2**((LIMBS - 1) * BITS_PER_LIMB + 32) == 2**264
- // x - (x // 2**256) * prime < 2 * prime
- // Proof:
- // x - (x // 2**256) * prime
- // == x - (x // 2**256) * (2**256 - (2**256 - prime))
- // == x - ((x // 2**256) * 2**256) + (x // 2**256) * (2**256 - prime)
- // == (x % prime) + (x // 2**256) * (2**256 - prime)
- // <= prime - 1 + (2**264 // 2**256) * (2**256 - prime)
- // <= 2**256 + 2**8 * 2**224 == 2**256 + 2**232
- // < 2 * (2**256 - 2**224)
- // <= 2 * prime
- // x - (x // 2**256 - 1) * prime < 2 * prime
- // Proof:
- // x - (x // 2**256) * prime + prime
- // == x - (x // 2**256) * (2**256 - (2**256 - prime)) + prime
- // == x - ((x//2**256) * 2**256) + (x//2**256) * (2**256 - prime) + prime
- // == (x % prime) + (x // 2**256) * (2**256 - prime) + prime
- // <= 2 * prime - 1 + (2**264 // 2**256) * (2**256 - prime)
- // <= 2 * prime + 2**8 * 2**224 == 2**256 + 2**232 + 2**256 - 2**224
- // < 2 * (2**256 - 2**224)
- // <= 2 * prime
- uint32_t coef =
- x->val[BN_LIMBS - 1] >> (256 - ((BN_LIMBS - 1) * BN_BITS_PER_LIMB));
- // clang-format off
- // coef == x // 2**256
- // 0 <= coef < 2**((LIMBS - 1) * BITS_PER_LIMB + 32 - 256) == 256
- // Proof:
- //* Let x[[a : b] be the number consisting of a-th to (b-1)-th bit of the number x.
- // x[LIMBS - 1] >> (256 - ((LIMBS - 1) * BITS_PER_LIMB))
- // == x[[(LIMBS - 1) * BITS_PER_LIMB : (LIMBS - 1) * BITS_PER_LIMB + 32]] >> (256 - ((LIMBS - 1) * BITS_PER_LIMB))
- // == x[[256 - ((LIMBS - 1) * BITS_PER_LIMB) + (LIMBS - 1) * BITS_PER_LIMB : (LIMBS - 1) * BITS_PER_LIMB + 32]]
- // == x[[256 : (LIMBS - 1) * BITS_PER_LIMB + 32]]
- // == x[[256 : 264]] == x // 2**256
- // clang-format on
- const int shift = 8;
- uint64_t acc = 1ull << shift;
- for (int i = 0; i < BN_LIMBS; i++) {
- acc += (((uint64_t)(BN_BASE - 1)) << shift) + x->val[i] -
- prime->val[i] * (uint64_t)coef;
- // acc neither overflows 64 bits nor underflows 0
- // Proof:
- // acc + (BASE - 1 << shift) + x[i] - prime[i] * coef
- // >= (BASE - 1 << shift) - prime[i] * coef
- // >= 2**shift * (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) * 255
- // == (2**shift - 255) * (2**BITS_PER_LIMB - 1)
- // == (2**8 - 255) * (2**29 - 1) == 2**29 - 1 >= 0
- // acc + (BASE - 1 << shift) + x[i] - prime[i] * coef
- // <= acc + ((BASE - 1) << shift) + x[i]
- // <= (2**(64 - BITS_PER_LIMB) - 1) + 2**shift * (2**BITS_PER_LIMB - 1)
- // + (2**32 - 1)
- // == (2**35 - 1) + 2**8 * (2**29 - 1) + 2**32
- // < 2**35 + 2**37 + 2**32 < 2**64
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 2**(64 - BITS_PER_LIMB) - 1 == 2**35 - 1
- // acc == (1 << BITS_PER_LIMB * (i + 1) + shift) + x[:i + 1]
- // - coef * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc == 1 << shift);
- // clang-format off
- // acc == 1 << shift
- // Proof:
- // acc
- // == (1 << BITS_PER_LIMB * LIMBS + shift) + x[:LIMBS] - coef * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == (1 << BITS_PER_LIMB * LIMBS + shift) + (x - coef * prime) >> BITS_PER_LIMB * LIMBS
- // <= (1 << BITS_PER_LIMB * LIMBS + shift) + (2 * prime) >> BITS_PER_LIMB * LIMBS
- // <= (1 << BITS_PER_LIMB * LIMBS + shift) + 2 * 2**256 >> BITS_PER_LIMB * LIMBS
- // <= 2**269 + 2**257 >> 2**261
- // <= 1 << 8 == 1 << shift
- // acc
- // == (1 << BITS_PER_LIMB * LIMBS + shift) + x[:LIMBS] - coef * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // >= (1 << BITS_PER_LIMB * LIMBS + shift) + 0 >> BITS_PER_LIMB * LIMBS
- // == (1 << BITS_PER_LIMB * LIMBS + shift) + 0 >> BITS_PER_LIMB * LIMBS
- // <= 1 << 8 == 1 << shift
- // clang-format on
- }
- // res = x**e % prime
- // Assumes both x and e are normalized, x < 2**259
- // Guarantees res is normalized and partly reduced modulo prime
- // Works properly even if &x == &res
- // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to e
- void bn_power_mod(const bignum256 *x, const bignum256 *e,
- const bignum256 *prime, bignum256 *res) {
- // Uses iterative right-to-left exponentiation by squaring, see
- // https://en.wikipedia.org/wiki/Modular_exponentiation#Right-to-left_binary_method
- bignum256 acc = {0};
- bn_copy(x, &acc);
- bn_one(res);
- for (int i = 0; i < BN_LIMBS; i++) {
- uint32_t limb = e->val[i];
- for (int j = 0; j < BN_BITS_PER_LIMB; j++) {
- // Break if the following bits of the last limb are zero
- if (i == BN_LIMBS - 1 && limb == 0) break;
- if (limb & 1)
- // acc * res < 2**519
- // Proof:
- // acc * res <= max(2**259 - 1, 2 * prime) * (2 * prime)
- // == max(2**259 - 1, 2**257) * 2**257 < 2**259 * 2**257
- // == 2**516 < 2**519
- bn_multiply(&acc, res, prime);
- limb >>= 1;
- // acc * acc < 2**519
- // Proof:
- // acc * acc <= max(2**259 - 1, 2 * prime)**2
- // <= (2**259)**2 == 2**518 < 2**519
- bn_multiply(&acc, &acc, prime);
- }
- // acc == x**(e[:i + 1]) % prime
- }
- memzero(&acc, sizeof(acc));
- }
- // x = sqrt(x) % prime
- // Explicitly x = x**((prime+1)/4) % prime
- // The other root is -sqrt(x)
- // Assumes x is normalized, x < 2**259 and quadratic residuum mod prime
- // Assumes prime is a prime number, prime % 4 == 3, it is normalized and
- // 2**256 - 2**224 <= prime <= 2**256
- // Guarantees x is normalized and fully reduced modulo prime
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to prime
- void bn_sqrt(bignum256 *x, const bignum256 *prime) {
- // Uses the Lagrange formula for the primes of the special form, see
- // http://en.wikipedia.org/wiki/Quadratic_residue#Prime_or_prime_power_modulus
- // If prime % 4 == 3, then sqrt(x) % prime == x**((prime+1)//4) % prime
- assert(prime->val[BN_LIMBS - 1] % 4 == 3);
- // e = (prime + 1) // 4
- bignum256 e = {0};
- bn_copy(prime, &e);
- bn_addi(&e, 1);
- bn_rshift(&e);
- bn_rshift(&e);
- bn_power_mod(x, &e, prime, x);
- bn_mod(x, prime);
- memzero(&e, sizeof(e));
- }
- // a = 1/a % 2**n
- // Assumes a is odd, 1 <= n <= 32
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to n
- uint32_t inverse_mod_power_two(uint32_t a, uint32_t n) {
- // Uses "Explicit Quadratic Modular inverse modulo 2" from section 3.3 of "On
- // Newton-Raphson iteration for multiplicative inverses modulo prime powers"
- // by Jean-Guillaume Dumas, see
- // https://arxiv.org/pdf/1209.6626.pdf
- // 1/a % 2**n
- // = (2-a) * product([1 + (a-1)**(2**i) for i in range(1, floor(log2(n)))])
- uint32_t acc = 2 - a;
- uint32_t f = a - 1;
- // mask = (1 << n) - 1
- uint32_t mask = n == 32 ? 0xFFFFFFFF : (1u << n) - 1;
- for (uint32_t i = 1; i < n; i <<= 1) {
- f = (f * f) & mask;
- acc = (acc * (1 + f)) & mask;
- }
- return acc;
- }
- // x = (x / 2**BITS_PER_LIMB) % prime
- // Assumes both x and prime are normalized
- // Assumes prime is an odd number and normalized
- // Guarantees x is normalized
- // If x is partly reduced (fully reduced) modulo prime,
- // guarantess x will be partly reduced (fully reduced) modulo prime
- void bn_divide_base(bignum256 *x, const bignum256 *prime) {
- // Uses an explicit formula for the modular inverse of power of two
- // (x / 2**n) % prime == (x + ((-x / prime) % 2**n) * prime) // 2**n
- // Proof:
- // (x + ((-x / prime) % 2**n) * prime) % 2**n
- // == (x - x / prime * prime) % 2**n
- // == 0
- // (x + ((-1 / prime) % 2**n) * prime) % prime
- // == x
- // if x < prime:
- // (x + ((-x / prime) % 2**n) * prime) // 2**n
- // <= ((prime - 1) + (2**n - 1) * prime) / 2**n
- // == (2**n * prime - 1) / 2**n == prime - 1 / 2**n < prime
- // if x < 2 * prime:
- // (x + ((-x / prime) % 2**n) * prime) // 2**n
- // <= ((2 * prime - 1) + (2**n - 1) * prime) / 2**n
- // == (2**n * prime + prime - 1) / 2**n
- // == prime + (prime - 1) / 2**n < 2 * prime
- // m = (-x / prime) % 2**BITS_PER_LIMB
- uint32_t m = (x->val[0] * (BN_BASE - inverse_mod_power_two(
- prime->val[0], BN_BITS_PER_LIMB))) &
- BN_LIMB_MASK;
- // m < 2**BITS_PER_LIMB
- uint64_t acc = x->val[0] + (uint64_t)m * prime->val[0];
- acc >>= BN_BITS_PER_LIMB;
- for (int i = 1; i < BN_LIMBS; i++) {
- acc = acc + x->val[i] + (uint64_t)m * prime->val[i];
- // acc does not overflow 64 bits
- // acc == acc + x + m * prime
- // <= 2**(64 - BITS_PER_LIMB) + 2**(BITS_PER_LIMB)
- // 2**(BITS_PER_LIMB) * 2**(BITS_PER_LIMB)
- // <= 2**(2 * BITS_PER_LIMB) + 2**(64 - BITS_PER_LIMB) +
- // 2**(BITS_PER_LIMB)
- // <= 2**58 + 2**35 + 2**29 < 2**64
- x->val[i - 1] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc < 2**35 == 2**(64 - BITS_PER_LIMB)
- // acc == x[:i + 1] + m * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
- }
- x->val[BN_LIMBS - 1] = acc;
- assert(acc >> BN_BITS_PER_LIMB == 0);
- // clang-format off
- // acc >> BITS_PER_LIMB == 0
- // Proof:
- // acc >> BITS_PER_LIMB
- // == (x[:LIMB] + m * prime[:LIMB] >> BITS_PER_LIMB * LIMBS) >> BITS_PER_LIMB * (LIMBS + 1)
- // == x + m * prime >> BITS_PER_LIMB * (LIMBS + 1)
- // <= (2**(BITS_PER_LIMB * LIMBS) - 1) + (2**BITS_PER_LIMB - 1) * (2**(BITS_PER_LIMB * LIMBS) - 1) >> BITS_PER_LIMB * (LIMBS + 1)
- // == 2**(BITS_PER_LIMB * LIMBS) - 1 + 2**(BITS_PER_LIMB * (LIMBS + 1)) - 2**(BITS_PER_LIMB * LIMBS) - 2**BITS_PER_LIMB + 1 >> BITS_PER_LIMB * (LIMBS + 1)
- // == 2**(BITS_PER_LIMB * (LIMBS + 1)) - 2**BITS_PER_LIMB >> BITS_PER_LIMB * (LIMBS + 1)
- // == 0
- // clang-format on
- }
- #if !USE_INVERSE_FAST
- // x = 1/x % prime if x != 0 else 0
- // Assumes x is normalized
- // Assumes prime is a prime number
- // Guarantees x is normalized and fully reduced modulo prime
- // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to prime
- static void bn_inverse_slow(bignum256 *x, const bignum256 *prime) {
- // Uses formula 1/x % prime == x**(prime - 2) % prime
- // See https://en.wikipedia.org/wiki/Fermat%27s_little_theorem
- bn_fast_mod(x, prime);
- // e = prime - 2
- bignum256 e = {0};
- bn_read_uint32(2, &e);
- bn_subtract(prime, &e, &e);
- bn_power_mod(x, &e, prime, x);
- bn_mod(x, prime);
- memzero(&e, sizeof(e));
- }
- #endif
- #if false
- // x = 1/x % prime if x != 0 else 0
- // Assumes x is is_normalized
- // Assumes GCD(x, prime) = 1
- // Guarantees x is normalized and fully reduced modulo prime
- // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to prime and x
- static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) {
- // "The Almost Montgomery Inverse" from the section 3 of "Constant Time
- // Modular Inversion" by Joppe W. Bos
- // See http://www.joppebos.com/files/CTInversion.pdf
- /*
- u = prime
- v = x & prime
- s = 1
- r = 0
- k = 0
- while v != 1:
- k += 1
- if is_even(u):
- u = u // 2
- s = 2 * s
- elif is_even(v):
- v = v // 2
- r = 2 * r
- elif v < u:
- u = (u - v) // 2
- r = r + s
- s = 2 * s
- else:
- v = (v - u) // 2
- s = r + s
- r = 2 * r
- s = (s / 2**k) % prime
- return s
- */
- if (bn_is_zero(x)) return;
- bn_fast_mod(x, prime);
- bn_mod(x, prime);
- bignum256 u = {0}, v = {0}, r = {0}, s = {0};
- bn_copy(prime, &u);
- bn_copy(x, &v);
- bn_one(&s);
- bn_zero(&r);
- int k = 0;
- while (!bn_is_one(&v)) {
- if ((u.val[0] & 1) == 0) {
- bn_rshift(&u);
- bn_lshift(&s);
- } else if ((v.val[0] & 1) == 0) {
- bn_rshift(&v);
- bn_lshift(&r);
- } else if (bn_is_less(&v, &u)) {
- bn_subtract(&u, &v, &u);
- bn_rshift(&u);
- bn_add(&r, &s);
- bn_lshift(&s);
- } else {
- bn_subtract(&v, &u, &v);
- bn_rshift(&v);
- bn_add(&s, &r);
- bn_lshift(&r);
- }
- k += 1;
- assert(!bn_is_zero(&v)); // assert GCD(x, prime) == 1
- }
- // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
- for (int i = 0; i < k / BITS_PER_LIMB; i++) {
- bn_divide_base(&s, prime);
- }
- // s = s / 2**(k % BITS_PER_LIMB)
- for (int i = 0; i < k % BN_BITS_PER_LIMB; i++) {
- bn_mult_half(&s, prime);
- }
- bn_copy(&s, x);
- memzero(&u, sizeof(u));
- memzero(&v, sizeof(v));
- memzero(&r, sizeof(r));
- memzero(&s, sizeof(s));
- }
- #endif
- #if USE_INVERSE_FAST
- // x = 1/x % prime if x != 0 else 0
- // Assumes x is is_normalized
- // Assumes GCD(x, prime) = 1
- // Guarantees x is normalized and fully reduced modulo prime
- // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
- // The function has constant control flow but not constant memory access flow
- // with regard to prime and x
- static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) {
- // Custom constant time version of "The Almost Montgomery Inverse" from the
- // section 3 of "Constant Time Modular Inversion" by Joppe W. Bos
- // See http://www.joppebos.com/files/CTInversion.pdf
- /*
- u = prime
- v = x % prime
- s = 1
- r = 0
- k = 0
- while v != 1:
- k += 1
- if is_even(u): # b1
- u = u // 2
- s = 2 * s
- elif is_even(v): # b2
- v = v // 2
- r = 2 * r
- elif v < u: # b3
- u = (u - v) // 2
- r = r + s
- s = 2 * s
- else: # b4
- v = (v - u) // 2
- s = r + s
- r = 2 * r
- s = (s / 2**k) % prime
- return s
- */
- bn_fast_mod(x, prime);
- bn_mod(x, prime);
- bignum256 u = {0}, v = {0}, r = {0}, s = {0};
- bn_copy(prime, &u);
- bn_copy(x, &v);
- bn_one(&s);
- bn_zero(&r);
- bignum256 zero = {0};
- bn_zero(&zero);
- int k = 0;
- int finished = 0, u_even = 0, v_even = 0, v_less_u = 0, b1 = 0, b2 = 0,
- b3 = 0, b4 = 0;
- finished = 0;
- for (int i = 0; i < 2 * BN_LIMBS * BN_BITS_PER_LIMB; i++) {
- finished = finished | -bn_is_one(&v);
- u_even = -bn_is_even(&u);
- v_even = -bn_is_even(&v);
- v_less_u = -bn_is_less(&v, &u);
- b1 = ~finished & u_even;
- b2 = ~finished & ~b1 & v_even;
- b3 = ~finished & ~b1 & ~b2 & v_less_u;
- b4 = ~finished & ~b1 & ~b2 & ~b3;
- // The ternary operator for pointers with constant control flow
- // BN_INVERSE_FAST_TERNARY(c, t, f) = t if c else f
- // Very nasty hack, sorry for that
- #define BN_INVERSE_FAST_TERNARY(c, t, f) \
- ((void *)(((c) & (uintptr_t)(t)) | (~(c) & (uintptr_t)(f))))
- bn_subtract(BN_INVERSE_FAST_TERNARY(b3, &u, &v),
- BN_INVERSE_FAST_TERNARY(
- b3 | b4, BN_INVERSE_FAST_TERNARY(b3, &v, &u), &zero),
- BN_INVERSE_FAST_TERNARY(b3, &u, &v));
- bn_add(BN_INVERSE_FAST_TERNARY(b3, &r, &s),
- BN_INVERSE_FAST_TERNARY(b3 | b4, BN_INVERSE_FAST_TERNARY(b3, &s, &r),
- &zero));
- bn_rshift(BN_INVERSE_FAST_TERNARY(b1 | b3, &u, &v));
- bn_lshift(BN_INVERSE_FAST_TERNARY(b1 | b3, &s, &r));
- k = k - ~finished;
- }
- // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
- for (int i = 0; i < 2 * BN_LIMBS; i++) {
- // s = s / 2**BITS_PER_LIMB % prime if i < k // BITS_PER_LIMB else s
- bn_copy(&s, &r);
- bn_divide_base(&r, prime);
- bn_cmov(&s, i < k / BN_BITS_PER_LIMB, &r, &s);
- }
- // s = s / 2**(k % BITS_PER_LIMB)
- for (int i = 0; i < BN_BITS_PER_LIMB; i++) {
- // s = s / 2 % prime if i < k % BITS_PER_LIMB else s
- bn_copy(&s, &r);
- bn_mult_half(&r, prime);
- bn_cmov(&s, i < k % BN_BITS_PER_LIMB, &r, &s);
- }
- bn_cmov(x, bn_is_zero(x), x, &s);
- memzero(&u, sizeof(u));
- memzero(&v, sizeof(v));
- memzero(&r, sizeof(s));
- memzero(&s, sizeof(s));
- }
- #endif
- #if false
- // x = 1/x % prime if x != 0 else 0
- // Assumes x is is_normalized
- // Assumes GCD(x, prime) = 1
- // Guarantees x is normalized and fully reduced modulo prime
- // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
- static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) {
- // Custom constant time version of "The Almost Montgomery Inverse" from the
- // section 3 of "Constant Time Modular Inversion" by Joppe W. Bos
- // See http://www.joppebos.com/files/CTInversion.pdf
- /*
- u = prime
- v = x % prime
- s = 1
- r = 0
- k = 0
- while v != 1:
- k += 1
- if is_even(u): # b1
- u = u // 2
- s = 2 * s
- elif is_even(v): # b2
- v = v // 2
- r = 2 * r
- elif v < u: # b3
- u = (u - v) // 2
- r = r + s
- s = 2 * s
- else: # b4
- v = (v - u) // 2
- s = r + s
- r = 2 * r
- s = (s / 2**k) % prime
- return s
- */
- bn_fast_mod(x, prime);
- bn_mod(x, prime);
- bignum256 u = {0}, v = {0}, r = {0}, s = {0};
- bn_copy(prime, &u);
- bn_copy(x, &v);
- bn_one(&s);
- bn_zero(&r);
- bignum256 zero = {0};
- bn_zero(&zero);
- int k = 0;
- uint32_t finished = 0, u_even = 0, v_even = 0, v_less_u = 0, b1 = 0, b2 = 0,
- b3 = 0, b4 = 0;
- finished = 0;
- bignum256 u_half = {0}, v_half = {0}, u_minus_v_half = {0}, v_minus_u_half = {0}, r_plus_s = {0}, r_twice = {0}, s_twice = {0};
- for (int i = 0; i < 2 * BN_LIMBS * BN_BITS_PER_LIMB; i++) {
- finished = finished | bn_is_one(&v);
- u_even = bn_is_even(&u);
- v_even = bn_is_even(&v);
- v_less_u = bn_is_less(&v, &u);
- b1 = (finished ^ 1) & u_even;
- b2 = (finished ^ 1) & (b1 ^ 1) & v_even;
- b3 = (finished ^ 1) & (b1 ^ 1) & (b2 ^ 1) & v_less_u;
- b4 = (finished ^ 1) & (b1 ^ 1) & (b2 ^ 1) & (b3 ^ 1);
- // u_half = u // 2
- bn_copy(&u, &u_half);
- bn_rshift(&u_half);
- // v_half = v // 2
- bn_copy(&v, &v_half);
- bn_rshift(&v_half);
- // u_minus_v_half = (u - v) // 2
- bn_subtract(&u, &v, &u_minus_v_half);
- bn_rshift(&u_minus_v_half);
- // v_minus_u_half = (v - u) // 2
- bn_subtract(&v, &u, &v_minus_u_half);
- bn_rshift(&v_minus_u_half);
- // r_plus_s = r + s
- bn_copy(&r, &r_plus_s);
- bn_add(&r_plus_s, &s);
- // r_twice = 2 * r
- bn_copy(&r, &r_twice);
- bn_lshift(&r_twice);
- // s_twice = 2 * s
- bn_copy(&s, &s_twice);
- bn_lshift(&s_twice);
- bn_cmov(&u, b1, &u_half, &u);
- bn_cmov(&u, b3, &u_minus_v_half, &u);
- bn_cmov(&v, b2, &v_half, &v);
- bn_cmov(&v, b4, &v_minus_u_half, &v);
- bn_cmov(&r, b2 | b4, &r_twice, &r);
- bn_cmov(&r, b3, &r_plus_s, &r);
- bn_cmov(&s, b1 | b3, &s_twice, &s);
- bn_cmov(&s, b4, &r_plus_s, &s);
- k = k + (finished ^ 1);
- }
- // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
- for (int i = 0; i < 2 * BN_LIMBS; i++) {
- // s = s / 2**BITS_PER_LIMB % prime if i < k // BITS_PER_LIMB else s
- bn_copy(&s, &r);
- bn_divide_base(&r, prime);
- bn_cmov(&s, i < k / BITS_PER_LIMB, &r, &s);
- }
- // s = s / 2**(k % BITS_PER_LIMB)
- for (int i = 0; i < BN_BITS_PER_LIMB; i++) {
- // s = s / 2 % prime if i < k % BITS_PER_LIMB else s
- bn_copy(&s, &r);
- bn_mult_half(&r, prime);
- bn_cmov(&s, i < k % BN_BITS_PER_LIMB, &r, &s);
- }
- bn_cmov(x, bn_is_zero(x), x, &s);
- memzero(&u, sizeof(u));
- memzero(&v, sizeof(v));
- memzero(&r, sizeof(r));
- memzero(&s, sizeof(s));
- memzero(&u_half, sizeof(u_half));
- memzero(&v_half, sizeof(v_half));
- memzero(&u_minus_v_half, sizeof(u_minus_v_half));
- memzero(&v_minus_u_half, sizeof(v_minus_u_half));
- memzero(&r_twice, sizeof(r_twice));
- memzero(&s_twice, sizeof(s_twice));
- memzero(&r_plus_s, sizeof(r_plus_s));
- }
- #endif
- // Normalizes x
- // Assumes x < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
- // Guarantees x is normalized
- void bn_normalize(bignum256 *x) {
- uint32_t acc = 0;
- for (int i = 0; i < BN_LIMBS; i++) {
- acc += x->val[i];
- // acc doesn't overflow 32 bits
- // Proof:
- // acc + x[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1)
- // == 7 + 2**29 - 1 < 2**32
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= (BN_BITS_PER_LIMB);
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- }
- }
- // x = x + y
- // Assumes x, y are normalized, x + y < 2**(LIMBS*BITS_PER_LIMB) == 2**261
- // Guarantees x is normalized
- // Works properly even if &x == &y
- void bn_add(bignum256 *x, const bignum256 *y) {
- uint32_t acc = 0;
- for (int i = 0; i < BN_LIMBS; i++) {
- acc += x->val[i] + y->val[i];
- // acc doesn't overflow 32 bits
- // Proof:
- // acc + x[i] + y[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1)
- // == (2**(32 - BITS_PER_LIMB) - 1) + 2**(BITS_PER_LIMB + 1) - 2
- // == 7 + 2**30 - 2 < 2**32
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc == x[:i + 1] + y[:i + 1] >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc == 0); // assert x + y < 2**261
- // acc == 0
- // Proof:
- // acc == x[:LIMBS] + y[:LIMBS] >> LIMBS * BITS_PER_LIMB
- // == x + y >> LIMBS * BITS_PER_LIMB
- // <= 2**(LIMBS * BITS_PER_LIMB) - 1 >> LIMBS * BITS_PER_LIMB == 0
- }
- // x = x + y % prime
- // Assumes x, y are normalized
- // Guarantees x is normalized and partly reduced modulo prime
- // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
- void bn_addmod(bignum256 *x, const bignum256 *y, const bignum256 *prime) {
- for (int i = 0; i < BN_LIMBS; i++) {
- x->val[i] += y->val[i];
- // x[i] doesn't overflow 32 bits
- // Proof:
- // x[i] + y[i]
- // <= 2 * (2**BITS_PER_LIMB - 1)
- // == 2**30 - 2 < 2**32
- }
- bn_fast_mod(x, prime);
- }
- // x = x + y
- // Assumes x is normalized
- // Assumes y <= 2**32 - 2**29 == 2**32 - 2**BITS_PER_LIMB and
- // x + y < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
- // Guarantees x is normalized
- void bn_addi(bignum256 *x, uint32_t y) {
- // assert(y <= 3758096384); // assert y <= 2**32 - 2**29
- uint32_t acc = y;
- for (int i = 0; i < BN_LIMBS; i++) {
- acc += x->val[i];
- // acc doesn't overflow 32 bits
- // Proof:
- // if i == 0:
- // acc + x[i] == y + x[0]
- // <= (2**32 - 2**BITS_PER_LIMB) + (2**BITS_PER_LIMB - 1)
- // == 2**32 - 1 < 2**32
- // else:
- // acc + x[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1)
- // == 7 + 2**29 - 1 < 2**32
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= (BN_BITS_PER_LIMB);
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc == x[:i + 1] + y >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc == 0); // assert x + y < 2**261
- // acc == 0
- // Proof:
- // acc == x[:LIMBS] + y << LIMBS * BITS_PER_LIMB
- // == x + y << LIMBS * BITS_PER_LIMB
- // <= 2**(LIMBS + BITS_PER_LIMB) - 1 << LIMBS * BITS_PER_LIMB
- // == 0
- }
- // x = x - y % prime
- // Explicitly x = x + prime - y
- // Assumes x, y are normalized
- // Assumes y < prime[0], x + prime - y < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
- // Guarantees x is normalized
- // If x is fully reduced modulo prime,
- // guarantess x will be partly reduced modulo prime
- // Assumes prime is nonzero and normalized
- void bn_subi(bignum256 *x, uint32_t y, const bignum256 *prime) {
- assert(y < prime->val[0]);
- // x = x + prime - y
- uint32_t acc = -y;
- for (int i = 0; i < BN_LIMBS; i++) {
- acc += x->val[i] + prime->val[i];
- // acc neither overflows 32 bits nor underflows 0
- // Proof:
- // acc + x[i] + prime[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1)
- // <= 7 + 2**30 - 2 < 2**32
- // acc + x[i] + prime[i]
- // >= -y + prime[0] >= 0
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc == x[:i + 1] + prime[:i + 1] - y >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc == 0); // assert x + prime - y < 2**261
- // acc == 0
- // Proof:
- // acc == x[:LIMBS] + prime[:LIMBS] - y >> BITS_PER_LIMB * LIMBS
- // == x + prime - y >> BITS_PER_LIMB * LIMBS
- // <= 2**(LIMBS * BITS_PER_LIMB) - 1 >> BITS_PER_LIMB * LIMBS == 0
- }
- // res = x - y % prime
- // Explicitly res = x + (2 * prime - y)
- // Assumes x, y are normalized, y is partly reduced
- // Assumes x + 2 * prime - y < 2**261 == 2**(BITS_PER_LIMB * LIMBS)
- // Guarantees res is normalized
- // Assumes prime is nonzero and normalized
- void bn_subtractmod(const bignum256 *x, const bignum256 *y, bignum256 *res,
- const bignum256 *prime) {
- // res = x + (2 * prime - y)
- uint32_t acc = 1;
- for (int i = 0; i < BN_LIMBS; i++) {
- acc += (BN_BASE - 1) + x->val[i] + 2 * prime->val[i] - y->val[i];
- // acc neither overflows 32 bits nor underflows 0
- // Proof:
- // acc + (BASE - 1) + x[i] + 2 * prime[i] - y[i]
- // >= (BASE - 1) - y[i]
- // == (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) == 0
- // acc + (BASE - 1) + x[i] + 2 * prime[i] - y[i]
- // <= acc + (BASE - 1) + x[i] + 2 * prime[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) +
- // (2**BITS_PER_LIMB - 1) + 2 * (2**BITS_PER_LIMB - 1)
- // <= (2**(32 - BITS_PER_LIMB) - 1) + 4 * (2**BITS_PER_LIMB - 1)
- // == 7 + 4 * 2**29 - 4 == 2**31 + 3 < 2**32
- res->val[i] = acc & (BN_BASE - 1);
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc == 2**(BITS_PER_LIMB * (i + 1)) + x[:i+1] - y[:i+1] + 2*prime[:i+1]
- // >> BITS_PER_LIMB * (i+1)
- }
- // assert(acc == 1); // assert x + 2 * prime - y < 2**261
- // clang-format off
- // acc == 1
- // Proof:
- // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] + 2 * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x - y + 2 * prime >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x + (2 * prime - y) >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
- // <= 2 * 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
- // == 1
- // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] + 2 * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x - y + 2 * prime >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x + (2 * prime - y) >> BITS_PER_LIMB * LIMBS
- // >= 2**(BITS_PER_LIMB * LIMBS) + 0 + 1 >> BITS_PER_LIMB * LIMBS
- // == 1
- // clang-format on
- }
- // res = x - y
- // Assumes x, y are normalized and x >= y
- // Guarantees res is normalized
- // Works properly even if &x == &y or &x == &res or &y == &res or
- // &x == &y == &res
- void bn_subtract(const bignum256 *x, const bignum256 *y, bignum256 *res) {
- uint32_t acc = 1;
- for (int i = 0; i < BN_LIMBS; i++) {
- acc += (BN_BASE - 1) + x->val[i] - y->val[i];
- // acc neither overflows 32 bits nor underflows 0
- // Proof:
- // acc + (BASE - 1) + x[i] - y[i]
- // >= (BASE - 1) - y == (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1)
- // == 0
- // acc + (BASE - 1) + x[i] - y[i]
- // <= acc + (BASE - 1) + x[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) +
- // (2**BITS_PER_LIMB - 1)
- // == 7 + 2 * 2**29 < 2 **32
- res->val[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc == 2**(BITS_PER_LIMB * (i + 1)) + x[:i + 1] - y[:i + 1]
- // >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc == 1); // assert x >= y
- // clang-format off
- // acc == 1
- // Proof:
- // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x - y >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
- // <= 2 * 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
- // == 1
- // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x - y >> BITS_PER_LIMB * LIMBS
- // >= 2**(BITS_PER_LIMB * LIMBS) >> BITS_PER_LIMB * LIMBS
- // == 1
- }
- // q = x // d, r = x % d
- // Assumes x is normalized, 1 <= d <= 61304
- // Guarantees q is normalized
- void bn_long_division(bignum256 *x, uint32_t d, bignum256 *q, uint32_t *r) {
- assert(1 <= d && d < 61304);
- uint32_t acc = 0;
- *r = x->val[BN_LIMBS - 1] % d;
- q->val[BN_LIMBS - 1] = x->val[BN_LIMBS - 1] / d;
- for (int i = BN_LIMBS - 2; i >= 0; i--) {
- acc = *r * (BN_BASE % d) + x->val[i];
- // acc doesn't overflow 32 bits
- // Proof:
- // r * (BASE % d) + x[i]
- // <= (d - 1) * (d - 1) + (2**BITS_PER_LIMB - 1)
- // == d**2 - 2*d + 2**BITS_PER_LIMB
- // == 61304**2 - 2 * 61304 + 2**29
- // == 3758057808 + 2**29 < 2**32
- q->val[i] = *r * (BN_BASE / d) + (acc / d);
- // q[i] doesn't overflow 32 bits
- // Proof:
- // r * (BASE // d) + (acc // d)
- // <= (d - 1) * (2**BITS_PER_LIMB / d) +
- // ((d**2 - 2*d + 2**BITS_PER_LIMB) / d)
- // <= (d - 1) * (2**BITS_PER_LIMB / d) + (d - 2 + 2**BITS_PER_LIMB / d)
- // == (d - 1 + 1) * (2**BITS_PER_LIMB / d) + d - 2
- // == 2**BITS_PER_LIMB + d - 2 <= 2**29 + 61304 < 2**32
- // q[i] == (r * BASE + x[i]) // d
- // Proof:
- // q[i] == r * (BASE // d) + (acc // d)
- // == r * (BASE // d) + (r * (BASE % d) + x[i]) // d
- // == (r * d * (BASE // d) + r * (BASE % d) + x[i]) // d
- // == (r * (d * (BASE // d) + (BASE % d)) + x[i]) // d
- // == (r * BASE + x[i]) // d
- // q[i] < 2**BITS_PER_LIMB
- // Proof:
- // q[i] == (r * BASE + x[i]) // d
- // <= ((d - 1) * 2**BITS_PER_LIMB + (2**BITS_PER_LIMB - 1)) / d
- // == (d * 2**BITS_PER_LIMB - 1) / d == 2**BITS_PER_LIMB - 1 / d
- // < 2**BITS_PER_LIMB
- *r = acc % d;
- // r == (r * BASE + x[i]) % d
- // Proof:
- // r == acc % d == (r * (BASE % d) + x[i]) % d
- // == (r * BASE + x[i]) % d
- // x[:i] == q[:i] * d + r
- }
- }
- // x = x // 58, r = x % 58
- // Assumes x is normalized
- // Guarantees x is normalized
- void bn_divmod58(bignum256 *x, uint32_t *r) { bn_long_division(x, 58, x, r); }
- // x = x // 1000, r = x % 1000
- // Assumes x is normalized
- // Guarantees x is normalized
- void bn_divmod1000(bignum256 *x, uint32_t *r) {
- bn_long_division(x, 1000, x, r);
- }
- // x = x // 10, r = x % 10
- // Assumes x is normalized
- // Guarantees x is normalized
- void bn_divmod10(bignum256 *x, uint32_t *r) { bn_long_division(x, 10, x, r); }
- // Formats amount
- // Assumes amount is normalized
- // Assumes prefix and suffix are null-terminated strings
- // Assumes output is an array of length output_length
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to any its argument
- size_t bn_format(const bignum256 *amount, const char *prefix, const char *suffix, unsigned int decimals, int exponent, bool trailing, char thousands, char *output, size_t output_length) {
- /*
- Python prototype of the function:
- def format(amount, prefix, suffix, decimals, exponent, trailing, thousands):
- if exponent >= 0:
- amount *= 10**exponent
- else:
- amount //= 10 ** (-exponent)
- d = pow(10, decimals)
- integer_part = amount // d
- integer_str = f"{integer_part:,}".replace(",", thousands or "")
- if decimals:
- decimal_part = amount % d
- decimal_str = f".{decimal_part:0{decimals}d}"
- if not trailing:
- decimal_str = decimal_str.rstrip("0").rstrip(".")
- else:
- decimal_str = ""
- return prefix + integer_str + decimal_str + suffix
- */
- // Auxiliary macro for bn_format
- // If enough space adds one character to output starting from the end
- #define BN_FORMAT_ADD_OUTPUT_CHAR(c) \
- { \
- --position; \
- if (output <= position && position < output + output_length) { \
- *position = (c); \
- } else { \
- memset(output, '\0', output_length); \
- return 0; \
- } \
- }
- bignum256 temp = {0};
- bn_copy(amount, &temp);
- uint32_t digit = 0;
- char *position = output + output_length;
- // Add string ending character
- BN_FORMAT_ADD_OUTPUT_CHAR('\0');
- // Add suffix
- size_t suffix_length = suffix ? strlen(suffix) : 0;
- for (int i = suffix_length - 1; i >= 0; --i)
- BN_FORMAT_ADD_OUTPUT_CHAR(suffix[i])
- // amount //= 10**exponent
- for (; exponent < 0; ++exponent) {
- // if temp == 0, there is no need to divide it by 10 anymore
- if (bn_is_zero(&temp)) {
- exponent = 0;
- break;
- }
- bn_divmod10(&temp, &digit);
- }
- // exponent >= 0 && decimals >= 0
- bool fractional_part = false; // is fractional-part of amount present
- { // Add fractional-part digits of amount
- // Add trailing zeroes
- unsigned int trailing_zeros = decimals < (unsigned int) exponent ? decimals : (unsigned int) exponent;
- // When casting a negative int to unsigned int, UINT_MAX is added to the int before
- // Since exponent >= 0, the value remains unchanged
- decimals -= trailing_zeros;
- exponent -= trailing_zeros;
- if (trailing && trailing_zeros) {
- fractional_part = true;
- for (; trailing_zeros > 0; --trailing_zeros)
- BN_FORMAT_ADD_OUTPUT_CHAR('0')
- }
- // exponent == 0 || decimals == 0
- // Add significant digits and leading zeroes
- for (; decimals > 0; --decimals) {
- bn_divmod10(&temp, &digit);
- if (fractional_part || digit || trailing) {
- fractional_part = true;
- BN_FORMAT_ADD_OUTPUT_CHAR('0' + digit)
- }
- else if (bn_is_zero(&temp)) {
- // We break since the remaining digits are zeroes and fractional_part == trailing == false
- decimals = 0;
- break;
- }
- }
- // decimals == 0
- }
- if (fractional_part) {
- BN_FORMAT_ADD_OUTPUT_CHAR('.')
- }
- { // Add integer-part digits of amount
- // Add trailing zeroes
- int digits = 0;
- if (!bn_is_zero(&temp)) {
- for (; exponent > 0; --exponent) {
- ++digits;
- BN_FORMAT_ADD_OUTPUT_CHAR('0')
- if (thousands != 0 && digits % 3 == 0) {
- BN_FORMAT_ADD_OUTPUT_CHAR(thousands)
- }
- }
- }
- // decimals == 0 && exponent == 0
- // Add significant digits
- bool is_zero = false;
- do {
- ++digits;
- bn_divmod10(&temp, &digit);
- is_zero = bn_is_zero(&temp);
- BN_FORMAT_ADD_OUTPUT_CHAR('0' + digit)
- if (thousands != 0 && !is_zero && digits % 3 == 0) {
- BN_FORMAT_ADD_OUTPUT_CHAR(thousands)
- }
- } while (!is_zero);
- }
- // Add prefix
- size_t prefix_length = prefix ? strlen(prefix) : 0;
- for (int i = prefix_length - 1; i >= 0; --i)
- BN_FORMAT_ADD_OUTPUT_CHAR(prefix[i])
- // Move formatted amount to the start of output
- int length = output - position + output_length;
- memmove(output, position, length);
- return length - 1;
- }
- #if USE_BN_PRINT
- // Prints x in hexadecimal
- // Assumes x is normalized and x < 2**256
- void bn_print(const bignum256 *x) {
- printf("%06x", x->val[8]);
- printf("%08x", ((x->val[7] << 3) | (x->val[6] >> 26)));
- printf("%07x", ((x->val[6] << 2) | (x->val[5] >> 27)) & 0x0FFFFFFF);
- printf("%07x", ((x->val[5] << 1) | (x->val[4] >> 28)) & 0x0FFFFFFF);
- printf("%07x", x->val[4] & 0x0FFFFFFF);
- printf("%08x", ((x->val[3] << 3) | (x->val[2] >> 26)));
- printf("%07x", ((x->val[2] << 2) | (x->val[1] >> 27)) & 0x0FFFFFFF);
- printf("%07x", ((x->val[1] << 1) | (x->val[0] >> 28)) & 0x0FFFFFFF);
- printf("%07x", x->val[0] & 0x0FFFFFFF);
- }
- // Prints comma separated list of limbs of x
- void bn_print_raw(const bignum256 *x) {
- for (int i = 0; i < BN_LIMBS - 1; i++) {
- printf("0x%08x, ", x->val[i]);
- }
- printf("0x%08x", x->val[BN_LIMBS - 1]);
- }
- #endif
- #if USE_INVERSE_FAST
- void bn_inverse(bignum256 *x, const bignum256 *prime) {
- bn_inverse_fast(x, prime);
- }
- #else
- void bn_inverse(bignum256 *x, const bignum256 *prime) {
- bn_inverse_slow(x, prime);
- }
- #endif
|