| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834 |
- /**
- * Copyright (c) 2013-2014 Tomas Dzetkulic
- * Copyright (c) 2013-2014 Pavol Rusnak
- * Copyright (c) 2015 Jochen Hoenicke
- * Copyright (c) 2016 Alex Beregszaszi
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included
- * in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES
- * OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
- #include "bignum.h"
- #include <assert.h>
- #include <stdint.h>
- #include <stdio.h>
- #include <string.h>
- #include "memzero.h"
- #include "script.h"
- /*
- This library implements 256-bit numbers arithmetic.
- An unsigned 256-bit number is represented by a bignum256 structure, that is an
- array of nine 32-bit values called limbs. Limbs are digits of the number in
- the base 2**29 representation in the little endian order. This means that
- bignum256 x;
- represents the value
- sum([x[i] * 2**(29*i) for i in range(9)).
- A limb of a bignum256 is *normalized* iff it's less than 2**29.
- A bignum256 is *normalized* iff every its limb is normalized.
- A number is *fully reduced modulo p* iff it is less than p.
- A number is *partly reduced modulo p* iff is is less than 2*p.
- The number p is usually a prime number such that 2^256 - 2^224 <= p <= 2^256.
- All functions except bn_fast_mod expect that all their bignum256 inputs are
- normalized. (The function bn_fast_mod allows the input number to have the
- most significant limb unnormalized). All bignum256 outputs of all functions
- are guaranteed to be normalized.
- A number can be partly reduced with bn_fast_mod, a partly reduced number can
- be fully reduced with bn_mod.
- A function has *constant control flow with regard to its argument* iff the
- order in which instructions of the function are executed doesn't depend on the
- value of the argument.
- A function has *constant memory access flow with regard to its argument* iff
- the memory addresses that are acessed and the order in which they are accessed
- don't depend on the value of the argument.
- A function *has contant control (memory access) flow* iff it has constant
- control (memory access) flow with regard to all its arguments.
- The following function has contant control flow with regard to its arugment
- n, however is doesn't have constant memory access flow with regard to it:
- void (int n, int *a) }
- a[0] = 0;
- a[n] = 0; // memory address reveals the value of n
- }
- Unless stated otherwise all functions are supposed to have both constant
- control flow and constant memory access flow.
- */
- #define BN_MAX_DECIMAL_DIGITS 79 // floor(log(2**(LIMBS * BITS_PER_LIMB), 10)) + 1
- // out_number = (bignum256) in_number
- // Assumes in_number is a raw bigendian 256-bit number
- // Guarantees out_number is normalized
- void bn_read_be(const uint8_t* in_number, bignum256* out_number) {
- uint32_t temp = 0;
- for(int i = 0; i < BN_LIMBS - 1; i++) {
- uint32_t limb = read_be(in_number + (BN_LIMBS - 2 - i) * 4);
- temp |= limb << (BN_EXTRA_BITS * i);
- out_number->val[i] = temp & BN_LIMB_MASK;
- temp = limb >> (32 - BN_EXTRA_BITS * (i + 1));
- }
- out_number->val[BN_LIMBS - 1] = temp;
- }
- // out_number = (256BE) in_number
- // Assumes in_number < 2**256
- // Guarantess out_number is a raw bigendian 256-bit number
- void bn_write_be(const bignum256* in_number, uint8_t* out_number) {
- uint32_t temp = in_number->val[BN_LIMBS - 1];
- for(int i = BN_LIMBS - 2; i >= 0; i--) {
- uint32_t limb = in_number->val[i];
- temp = (temp << (BN_BITS_PER_LIMB - BN_EXTRA_BITS * i)) | (limb >> (BN_EXTRA_BITS * i));
- write_be(out_number + (BN_LIMBS - 2 - i) * 4, temp);
- temp = limb;
- }
- }
- // out_number = (bignum256) in_number
- // Assumes in_number is a raw little endian 256-bit number
- // Guarantees out_number is normalized
- void bn_read_le(const uint8_t* in_number, bignum256* out_number) {
- uint32_t temp = 0;
- for(int i = 0; i < BN_LIMBS - 1; i++) {
- uint32_t limb = read_le(in_number + i * 4);
- temp |= limb << (BN_EXTRA_BITS * i);
- out_number->val[i] = temp & BN_LIMB_MASK;
- temp = limb >> (32 - BN_EXTRA_BITS * (i + 1));
- }
- out_number->val[BN_LIMBS - 1] = temp;
- }
- // out_number = (256LE) in_number
- // Assumes in_number < 2**256
- // Guarantess out_number is a raw little endian 256-bit number
- void bn_write_le(const bignum256* in_number, uint8_t* out_number) {
- uint32_t temp = in_number->val[BN_LIMBS - 1];
- for(int i = BN_LIMBS - 2; i >= 0; i--) {
- uint32_t limb = in_number->val[i];
- temp = (temp << (BN_BITS_PER_LIMB - BN_EXTRA_BITS * i)) | (limb >> (BN_EXTRA_BITS * i));
- write_le(out_number + i * 4, temp);
- temp = limb;
- }
- }
- // out_number = (bignum256) in_number
- // Guarantees out_number is normalized
- void bn_read_uint32(uint32_t in_number, bignum256* out_number) {
- out_number->val[0] = in_number & BN_LIMB_MASK;
- out_number->val[1] = in_number >> BN_BITS_PER_LIMB;
- for(uint32_t i = 2; i < BN_LIMBS; i++) out_number->val[i] = 0;
- }
- // out_number = (bignum256) in_number
- // Guarantees out_number is normalized
- void bn_read_uint64(uint64_t in_number, bignum256* out_number) {
- out_number->val[0] = in_number & BN_LIMB_MASK;
- out_number->val[1] = (in_number >>= BN_BITS_PER_LIMB) & BN_LIMB_MASK;
- out_number->val[2] = in_number >> BN_BITS_PER_LIMB;
- for(uint32_t i = 3; i < BN_LIMBS; i++) out_number->val[i] = 0;
- }
- // Returns the bitsize of x
- // Assumes x is normalized
- // The function doesn't have neither constant control flow nor constant memory
- // access flow
- int bn_bitcount(const bignum256* x) {
- for(int i = BN_LIMBS - 1; i >= 0; i--) {
- uint32_t limb = x->val[i];
- if(limb != 0) {
- // __builtin_clz returns the number of leading zero bits starting at the
- // most significant bit position
- return i * BN_BITS_PER_LIMB + (32 - __builtin_clz(limb));
- }
- }
- return 0;
- }
- // Returns the number of decimal digits of x; if x is 0, returns 1
- // Assumes x is normalized
- // The function doesn't have neither constant control flow nor constant memory
- // access flow
- unsigned int bn_digitcount(const bignum256* x) {
- bignum256 val = {0};
- bn_copy(x, &val);
- unsigned int digits = 1;
- for(unsigned int i = 0; i < BN_MAX_DECIMAL_DIGITS; i += 3) {
- uint32_t limb = 0;
- bn_divmod1000(&val, &limb);
- if(limb >= 100) {
- digits = i + 3;
- } else if(limb >= 10) {
- digits = i + 2;
- } else if(limb >= 1) {
- digits = i + 1;
- }
- }
- memzero(&val, sizeof(val));
- return digits;
- }
- // x = 0
- // Guarantees x is normalized
- void bn_zero(bignum256* x) {
- for(int i = 0; i < BN_LIMBS; i++) {
- x->val[i] = 0;
- }
- }
- // x = 1
- // Guarantees x is normalized
- void bn_one(bignum256* x) {
- x->val[0] = 1;
- for(int i = 1; i < BN_LIMBS; i++) {
- x->val[i] = 0;
- }
- }
- // Returns x == 0
- // Assumes x is normalized
- int bn_is_zero(const bignum256* x) {
- uint32_t result = 0;
- for(int i = 0; i < BN_LIMBS; i++) {
- result |= x->val[i];
- }
- return !result;
- }
- // Returns x == 1
- // Assumes x is normalized
- int bn_is_one(const bignum256* x) {
- uint32_t result = x->val[0] ^ 1;
- for(int i = 1; i < BN_LIMBS; i++) {
- result |= x->val[i];
- }
- return !result;
- }
- // Returns x < y
- // Assumes x, y are normalized
- int bn_is_less(const bignum256* x, const bignum256* y) {
- uint32_t res1 = 0;
- uint32_t res2 = 0;
- for(int i = BN_LIMBS - 1; i >= 0; i--) {
- res1 = (res1 << 1) | (x->val[i] < y->val[i]);
- res2 = (res2 << 1) | (x->val[i] > y->val[i]);
- }
- return res1 > res2;
- }
- // Returns x == y
- // Assumes x, y are normalized
- int bn_is_equal(const bignum256* x, const bignum256* y) {
- uint32_t result = 0;
- for(int i = 0; i < BN_LIMBS; i++) {
- result |= x->val[i] ^ y->val[i];
- }
- return !result;
- }
- // res = cond if truecase else falsecase
- // Assumes cond is either 0 or 1
- // Works properly even if &res == &truecase or &res == &falsecase or
- // &truecase == &falsecase or &res == &truecase == &falsecase
- void bn_cmov(
- bignum256* res,
- volatile uint32_t cond,
- const bignum256* truecase,
- const bignum256* falsecase) {
- // Intentional use of bitwise OR operator to ensure constant-time
- assert((int)(cond == 1) | (int)(cond == 0));
- uint32_t tmask = -cond; // tmask = 0xFFFFFFFF if cond else 0x00000000
- uint32_t fmask = ~tmask; // fmask = 0x00000000 if cond else 0xFFFFFFFF
- for(int i = 0; i < BN_LIMBS; i++) {
- res->val[i] = (truecase->val[i] & tmask) | (falsecase->val[i] & fmask);
- }
- }
- // x = -x % prime if cond else x,
- // Explicitly x = (3 * prime - x if x > prime else 2 * prime - x) if cond else
- // else (x if x > prime else x + prime)
- // Assumes x is normalized and partly reduced
- // Assumes cond is either 1 or 0
- // Guarantees x is normalized
- // Assumes prime is normalized and
- // 0 < prime < 2**260 == 2**(BITS_PER_LIMB * LIMBS - 1)
- void bn_cnegate(volatile uint32_t cond, bignum256* x, const bignum256* prime) {
- // Intentional use of bitwise OR operator to ensure constant time
- assert((int)(cond == 1) | (int)(cond == 0));
- uint32_t tmask = -cond; // tmask = 0xFFFFFFFF if cond else 0x00000000
- uint32_t fmask = ~tmask; // fmask = 0x00000000 if cond else 0xFFFFFFFF
- bn_mod(x, prime);
- // x < prime
- uint32_t acc1 = 1;
- uint32_t acc2 = 0;
- for(int i = 0; i < BN_LIMBS; i++) {
- acc1 += (BN_BASE - 1) + 2 * prime->val[i] - x->val[i];
- // acc1 neither overflows 32 bits nor underflows 0
- // Proof:
- // acc1 + (BASE - 1) + 2 * prime[i] - x[i]
- // >= (BASE - 1) - x >= (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1)
- // == 0
- // acc1 + (BASE - 1) + 2 * prime[i] - x[i]
- // <= acc1 + (BASE - 1) + 2 * prime[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1) +
- // (2**BITS_PER_LIMB - 1)
- // == 7 + 3 * 2**29 < 2**32
- acc2 += prime->val[i] + x->val[i];
- // acc2 doesn't overflow 32 bits
- // Proof:
- // acc2 + prime[i] + x[i]
- // <= 2**(32 - BITS_PER_LIMB) - 1 + 2 * (2**BITS_PER_LIMB - 1)
- // == 2**(32 - BITS_PER_LIMB) + 2**(BITS_PER_LIMB + 1) - 2
- // == 2**30 + 5 < 2**32
- // x = acc1 & LIMB_MASK if cond else acc2 & LIMB_MASK
- x->val[i] = ((acc1 & tmask) | (acc2 & fmask)) & BN_LIMB_MASK;
- acc1 >>= BN_BITS_PER_LIMB;
- // acc1 <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc1 == 2**(BITS_PER_LIMB * (i + 1)) + 2 * prime[:i + 1] - x[:i + 1]
- // >> BITS_PER_LIMB * (i + 1)
- acc2 >>= BN_BITS_PER_LIMB;
- // acc2 <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc2 == prime[:i + 1] + x[:i + 1] >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc1 == 1); // assert prime <= 2**260
- // assert(acc2 == 0);
- // clang-format off
- // acc1 == 1
- // Proof:
- // acc1 == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime[:LIMBS] - x[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime - x >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) + 2 * prime >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) + 2 * (2**(BITS_PER_LIMB * LIMBS - 1) - 1) >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 2 >> BITS_PER_LIMB * LIMBS
- // == 1
- // acc1 == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime[:LIMBS] - x[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + 2 * prime - x >> BITS_PER_LIMB * LIMBS
- // >= 2**(BITS_PER_LIMB * LIMBS) + 0 >> BITS_PER_LIMB * LIMBS
- // == 1
- // acc2 == 0
- // Proof:
- // acc2 == prime[:LIMBS] + x[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == prime + x >> BITS_PER_LIMB * LIMBS
- // <= 2 * prime - 1 >> BITS_PER_LIMB * LIMBS
- // <= 2 * (2**(BITS_PER_LIMB * LIMBS - 1) - 1) - 1 >> 261
- // == 2**(BITS_PER_LIMB * LIMBS) - 3 >> BITS_PER_LIMB * LIMBS
- // == 0
- // clang-format on
- }
- // x <<= 1
- // Assumes x is normalized, x < 2**260 == 2**(LIMBS*BITS_PER_LIMB - 1)
- // Guarantees x is normalized
- void bn_lshift(bignum256* x) {
- for(int i = BN_LIMBS - 1; i > 0; i--) {
- x->val[i] = ((x->val[i] << 1) & BN_LIMB_MASK) | (x->val[i - 1] >> (BN_BITS_PER_LIMB - 1));
- }
- x->val[0] = (x->val[0] << 1) & BN_LIMB_MASK;
- }
- // x >>= 1, i.e. x = floor(x/2)
- // Assumes x is normalized
- // Guarantees x is normalized
- // If x is partly reduced (fully reduced) modulo prime,
- // guarantess x will be partly reduced (fully reduced) modulo prime
- void bn_rshift(bignum256* x) {
- for(int i = 0; i < BN_LIMBS - 1; i++) {
- x->val[i] = (x->val[i] >> 1) | ((x->val[i + 1] & 1) << (BN_BITS_PER_LIMB - 1));
- }
- x->val[BN_LIMBS - 1] >>= 1;
- }
- // Sets i-th least significant bit (counting from zero)
- // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
- // Guarantees x is normalized
- // The function has constant control flow but not constant memory access flow
- // with regard to i
- void bn_setbit(bignum256* x, uint16_t i) {
- assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
- x->val[i / BN_BITS_PER_LIMB] |= (1u << (i % BN_BITS_PER_LIMB));
- }
- // clears i-th least significant bit (counting from zero)
- // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
- // Guarantees x is normalized
- // The function has constant control flow but not constant memory access flow
- // with regard to i
- void bn_clearbit(bignum256* x, uint16_t i) {
- assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
- x->val[i / BN_BITS_PER_LIMB] &= ~(1u << (i % BN_BITS_PER_LIMB));
- }
- // returns i-th least significant bit (counting from zero)
- // Assumes x is normalized and 0 <= i < 261 == LIMBS*BITS_PER_LIMB
- // The function has constant control flow but not constant memory access flow
- // with regard to i
- uint32_t bn_testbit(const bignum256* x, uint16_t i) {
- assert(i < BN_LIMBS * BN_BITS_PER_LIMB);
- return (x->val[i / BN_BITS_PER_LIMB] >> (i % BN_BITS_PER_LIMB)) & 1;
- }
- // res = x ^ y
- // Assumes x, y are normalized
- // Guarantees res is normalized
- // Works properly even if &res == &x or &res == &y or &res == &x == &y
- void bn_xor(bignum256* res, const bignum256* x, const bignum256* y) {
- for(int i = 0; i < BN_LIMBS; i++) {
- res->val[i] = x->val[i] ^ y->val[i];
- }
- }
- // x = x / 2 % prime
- // Explicitly x = x / 2 if is_even(x) else (x + prime) / 2
- // Assumes x is normalized, x + prime < 261 == LIMBS * BITS_PER_LIMB
- // Guarantees x is normalized
- // If x is partly reduced (fully reduced) modulo prime,
- // guarantess x will be partly reduced (fully reduced) modulo prime
- // Assumes prime is an odd number and normalized
- void bn_mult_half(bignum256* x, const bignum256* prime) {
- // x = x / 2 if is_even(x) else (x + prime) / 2
- uint32_t x_is_odd_mask = -(x->val[0] & 1); // x_is_odd_mask = 0xFFFFFFFF if is_odd(x) else 0
- uint32_t acc = (x->val[0] + (prime->val[0] & x_is_odd_mask)) >> 1;
- // acc < 2**BITS_PER_LIMB
- // Proof:
- // acc == x[0] + prime[0] & x_is_odd_mask >> 1
- // <= (2**(BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB) - 1) >> 1
- // == 2**(BITS_PER_LIMB + 1) - 2 >> 1
- // < 2**(BITS_PER_LIMB)
- for(int i = 0; i < BN_LIMBS - 1; i++) {
- uint32_t temp = (x->val[i + 1] + (prime->val[i + 1] & x_is_odd_mask));
- // temp < 2**(BITS_PER_LIMB + 1)
- // Proof:
- // temp == x[i + 1] + val[i + 1] & x_is_odd_mask
- // <= (2**(BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB) - 1)
- // < 2**(BITS_PER_LIMB + 1)
- acc += (temp & 1) << (BN_BITS_PER_LIMB - 1);
- // acc doesn't overflow 32 bits
- // Proof:
- // acc + (temp & 1 << BITS_PER_LIMB - 1)
- // <= 2**(BITS_PER_LIMB + 1) + 2**(BITS_PER_LIMB - 1)
- // <= 2**30 + 2**28 < 2**32
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- acc += temp >> 1;
- // acc < 2**(BITS_PER_LIMB + 1)
- // Proof:
- // acc + (temp >> 1)
- // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**(BITS_PER_LIMB + 1) - 1 >> 1)
- // == 7 + 2**(BITS_PER_LIMB) - 1 < 2**(BITS_PER_LIMB + 1)
- // acc == x[:i+2]+(prime[:i+2] & x_is_odd_mask) >> BITS_PER_LIMB * (i+1)
- }
- x->val[BN_LIMBS - 1] = acc;
- // assert(acc >> BITS_PER_LIMB == 0);
- // acc >> BITS_PER_LIMB == 0
- // Proof:
- // acc
- // == x[:LIMBS] + (prime[:LIMBS] & x_is_odd_mask) >> BITS_PER_LIMB*LIMBS
- // == x + (prime & x_is_odd_mask) >> BITS_PER_LIMB * LIMBS
- // <= x + prime >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
- // == 0
- }
- // x = x * k % prime
- // Assumes x is normalized, 0 <= k <= 8 = 2**(32 - BITS_PER_LIMB)
- // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
- // Guarantees x is normalized and partly reduced modulo prime
- void bn_mult_k(bignum256* x, uint8_t k, const bignum256* prime) {
- assert(k <= 8);
- for(int i = 0; i < BN_LIMBS; i++) {
- x->val[i] = k * x->val[i];
- // x[i] doesn't overflow 32 bits
- // k * x[i] <= 2**(32 - BITS_PER_LIMB) * (2**BITS_PER_LIMB - 1)
- // < 2**(32 - BITS_PER_LIMB) * 2**BITS_PER_LIMB == 2**32
- }
- bn_fast_mod(x, prime);
- }
- // Reduces partly reduced x modulo prime
- // Explicitly x = x if x < prime else x - prime
- // Assumes x is partly reduced modulo prime
- // Guarantees x is fully reduced modulo prime
- // Assumes prime is nonzero and normalized
- void bn_mod(bignum256* x, const bignum256* prime) {
- uint32_t x_less_prime = bn_is_less(x, prime);
- bignum256 temp = {0};
- bn_subtract(x, prime, &temp);
- bn_cmov(x, x_less_prime, x, &temp);
- memzero(&temp, sizeof(temp));
- }
- // Auxiliary function for bn_multiply
- // res = k * x
- // Assumes k and x are normalized
- // Guarantees res is normalized 18 digit little endian number in base 2**29
- void bn_multiply_long(const bignum256* k, const bignum256* x, uint32_t res[2 * BN_LIMBS]) {
- // Uses long multiplication in base 2**29, see
- // https://en.wikipedia.org/wiki/Multiplication_algorithm#Long_multiplication
- uint64_t acc = 0;
- // compute lower half
- for(int i = 0; i < BN_LIMBS; i++) {
- for(int j = 0; j <= i; j++) {
- acc += k->val[j] * (uint64_t)x->val[i - j];
- // acc doesn't overflow 64 bits
- // Proof:
- // acc <= acc + sum([k[j] * x[i-j] for j in range(i)])
- // <= (2**(64 - BITS_PER_LIMB) - 1) +
- // LIMBS * (2**BITS_PER_LIMB - 1) * (2**BITS_PER_LIMB - 1)
- // == (2**35 - 1) + 9 * (2**29 - 1) * (2**29 - 1)
- // <= 2**35 + 9 * 2**58 < 2**64
- }
- res[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 2**35 - 1 == 2**(64 - BITS_PER_LIMB) - 1
- }
- // compute upper half
- for(int i = BN_LIMBS; i < 2 * BN_LIMBS - 1; i++) {
- for(int j = i - BN_LIMBS + 1; j < BN_LIMBS; j++) {
- acc += k->val[j] * (uint64_t)x->val[i - j];
- // acc doesn't overflow 64 bits
- // Proof:
- // acc <= acc + sum([k[j] * x[i-j] for j in range(i)])
- // <= (2**(64 - BITS_PER_LIMB) - 1)
- // LIMBS * (2**BITS_PER_LIMB - 1) * (2**BITS_PER_LIMB - 1)
- // == (2**35 - 1) + 9 * (2**29 - 1) * (2**29 - 1)
- // <= 2**35 + 9 * 2**58 < 2**64
- }
- res[i] = acc & (BN_BASE - 1);
- acc >>= BN_BITS_PER_LIMB;
- // acc < 2**35 == 2**(64 - BITS_PER_LIMB)
- }
- res[2 * BN_LIMBS - 1] = acc;
- }
- // Auxiliary function for bn_multiply
- // Assumes 0 <= d <= 8 == LIMBS - 1
- // Assumes res is normalized and res < 2**(256 + 29*d + 31)
- // Guarantess res in normalized and res < 2 * prime * 2**(29*d)
- // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
- void bn_multiply_reduce_step(uint32_t res[2 * BN_LIMBS], const bignum256* prime, uint32_t d) {
- // clang-format off
- // Computes res = res - (res // 2**(256 + BITS_PER_LIMB * d)) * prime * 2**(BITS_PER_LIMB * d)
- // res - (res // 2**(256 + BITS_PER_LIMB * d)) * prime * 2**(BITS_PER_LIMB * d) < 2 * prime * 2**(BITS_PER_LIMB * d)
- // Proof:
- // res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * prime
- // == res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * (2**256 - (2**256 - prime))
- // == res - res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * 2**256 + res // (2**(256 + BITS_PER_LIMB * d)) * 2**(BITS_PER_LIMB * d) * (2**256 - prime)
- // == (res % 2**(256 + BITS_PER_LIMB * d)) + res // (2**256 + BITS_PER_LIMB * d) * 2**(BITS_PER_LIMB * d) * (2**256 - prime)
- // <= (2**(256 + 29*d + 31) % 2**(256 + 29*d)) + (2**(256 + 29*d + 31) - 1) / (2**256 + 29*d) * 2**(29*d) * (2**256 - prime)
- // <= 2**(256 + 29*d) + 2**(256 + 29*d + 31) / (2**256 + 29*d) * 2**(29*d) * (2**256 - prime)
- // == 2**(256 + 29*d) + 2**31 * 2**(29*d) * (2**256 - prime)
- // == 2**(29*d) * (2**256 + 2**31 * (2*256 - prime))
- // <= 2**(29*d) * (2**256 + 2**31 * 2*224)
- // <= 2**(29*d) * (2**256 + 2**255)
- // <= 2**(29*d) * 2 * (2**256 - 2**224)
- // <= 2 * prime * 2**(29*d)
- // clang-format on
- uint32_t coef = (res[d + BN_LIMBS - 1] >> (256 - (BN_LIMBS - 1) * BN_BITS_PER_LIMB)) +
- (res[d + BN_LIMBS] << ((BN_LIMBS * BN_BITS_PER_LIMB) - 256));
- // coef == res // 2**(256 + BITS_PER_LIMB * d)
- // coef < 2**31
- // Proof:
- // coef == res // 2**(256 + BITS_PER_LIMB * d)
- // < 2**(256 + 29 * d + 31) // 2**(256 + 29 * d)
- // == 2**31
- const int shift = 31;
- uint64_t acc = 1ull << shift;
- for(int i = 0; i < BN_LIMBS; i++) {
- acc += (((uint64_t)(BN_BASE - 1)) << shift) + res[d + i] - prime->val[i] * (uint64_t)coef;
- // acc neither overflow 64 bits nor underflow zero
- // Proof:
- // acc + ((BASE - 1) << shift) + res[d + i] - prime[i] * coef
- // >= ((BASE - 1) << shift) - prime[i] * coef
- // == 2**shift * (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) *
- // (2**31 - 1)
- // == (2**shift - 2**31 + 1) * (2**BITS_PER_LIMB - 1)
- // == (2**31 - 2**31 + 1) * (2**29 - 1)
- // == 2**29 - 1 > 0
- // acc + ((BASE - 1) << shift) + res[d + i] - prime[i] * coef
- // <= acc + ((BASE - 1) << shift) + res[d+i]
- // <= (2**(64 - BITS_PER_LIMB) - 1) + 2**shift * (2**BITS_PER_LIMB - 1)
- // + (2*BITS_PER_LIMB - 1)
- // == (2**(64 - BITS_PER_LIMB) - 1) + (2**shift + 1) *
- // (2**BITS_PER_LIMB - 1)
- // == (2**35 - 1) + (2**31 + 1) * (2**29 - 1)
- // <= 2**35 + 2**60 + 2**29 < 2**64
- res[d + i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 2**(64 - BITS_PER_LIMB) - 1 == 2**35 - 1
- // acc == (1 << BITS_PER_LIMB * (i + 1) + shift) + res[d : d + i + 1]
- // - coef * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
- }
- // acc += (((uint64_t)(BASE - 1)) << shift) + res[d + LIMBS];
- // acc >>= BITS_PER_LIMB;
- // assert(acc <= 1ul << shift);
- // clang-format off
- // acc == 1 << shift
- // Proof:
- // acc
- // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime[:LIMBS] >> BITS_PER_LIMB * (LIMBS + 1)
- // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime >> BITS_PER_LIMB * (LIMBS + 1)
- // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res[d : d + LIMBS + 1] - coef * prime) >> BITS_PER_LIMB * (LIMBS + 1)
- // <= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res[:d] + BASE**d * res[d : d + LIMBS + 1] - BASE**d * coef * prime)//BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
- // <= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (res - BASE**d * coef * prime) // BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
- // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + (2 * prime * BASE**d) // BASE**d >> BITS_PER_LIMB * (LIMBS + 1)
- // <= (1 << 321) + 2 * 2**256 >> 290
- // == 1 << 31 == 1 << shift
- // == (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + res[d : d + LIMBS + 1] - coef * prime[:LIMBS + 1] >> BITS_PER_LIMB * (LIMBS + 1)
- // >= (1 << BITS_PER_LIMB * (LIMBS + 1) + shift) + 0 >> BITS_PER_LIMB * (LIMBS + 1)
- // == 1 << shift
- // clang-format on
- res[d + BN_LIMBS] = 0;
- }
- // Auxiliary function for bn_multiply
- // Partly reduces res and stores both in x and res
- // Assumes res in normalized and res < 2**519
- // Guarantees x is normalized and partly reduced modulo prime
- // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
- void bn_multiply_reduce(bignum256* x, uint32_t res[2 * BN_LIMBS], const bignum256* prime) {
- for(int i = BN_LIMBS - 1; i >= 0; i--) {
- // res < 2**(256 + 29*i + 31)
- // Proof:
- // if i == LIMBS - 1:
- // res < 2**519
- // == 2**(256 + 29 * 8 + 31)
- // == 2**(256 + 29 * (LIMBS - 1) + 31)
- // else:
- // res < 2 * prime * 2**(29 * (i + 1))
- // <= 2**256 * 2**(29*i + 29) < 2**(256 + 29*i + 31)
- bn_multiply_reduce_step(res, prime, i);
- }
- for(int i = 0; i < BN_LIMBS; i++) {
- x->val[i] = res[i];
- }
- }
- // x = k * x % prime
- // Assumes k, x are normalized, k * x < 2**519
- // Guarantees x is normalized and partly reduced modulo prime
- // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
- void bn_multiply(const bignum256* k, bignum256* x, const bignum256* prime) {
- uint32_t res[2 * BN_LIMBS] = {0};
- bn_multiply_long(k, x, res);
- bn_multiply_reduce(x, res, prime);
- memzero(res, sizeof(res));
- }
- // Partly reduces x modulo prime
- // Assumes limbs of x except the last (the most significant) one are normalized
- // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
- // Guarantees x is normalized and partly reduced modulo prime
- void bn_fast_mod(bignum256* x, const bignum256* prime) {
- // Computes x = x - (x // 2**256) * prime
- // x < 2**((LIMBS - 1) * BITS_PER_LIMB + 32) == 2**264
- // x - (x // 2**256) * prime < 2 * prime
- // Proof:
- // x - (x // 2**256) * prime
- // == x - (x // 2**256) * (2**256 - (2**256 - prime))
- // == x - ((x // 2**256) * 2**256) + (x // 2**256) * (2**256 - prime)
- // == (x % prime) + (x // 2**256) * (2**256 - prime)
- // <= prime - 1 + (2**264 // 2**256) * (2**256 - prime)
- // <= 2**256 + 2**8 * 2**224 == 2**256 + 2**232
- // < 2 * (2**256 - 2**224)
- // <= 2 * prime
- // x - (x // 2**256 - 1) * prime < 2 * prime
- // Proof:
- // x - (x // 2**256) * prime + prime
- // == x - (x // 2**256) * (2**256 - (2**256 - prime)) + prime
- // == x - ((x//2**256) * 2**256) + (x//2**256) * (2**256 - prime) + prime
- // == (x % prime) + (x // 2**256) * (2**256 - prime) + prime
- // <= 2 * prime - 1 + (2**264 // 2**256) * (2**256 - prime)
- // <= 2 * prime + 2**8 * 2**224 == 2**256 + 2**232 + 2**256 - 2**224
- // < 2 * (2**256 - 2**224)
- // <= 2 * prime
- uint32_t coef = x->val[BN_LIMBS - 1] >> (256 - ((BN_LIMBS - 1) * BN_BITS_PER_LIMB));
- // clang-format off
- // coef == x // 2**256
- // 0 <= coef < 2**((LIMBS - 1) * BITS_PER_LIMB + 32 - 256) == 256
- // Proof:
- //* Let x[[a : b] be the number consisting of a-th to (b-1)-th bit of the number x.
- // x[LIMBS - 1] >> (256 - ((LIMBS - 1) * BITS_PER_LIMB))
- // == x[[(LIMBS - 1) * BITS_PER_LIMB : (LIMBS - 1) * BITS_PER_LIMB + 32]] >> (256 - ((LIMBS - 1) * BITS_PER_LIMB))
- // == x[[256 - ((LIMBS - 1) * BITS_PER_LIMB) + (LIMBS - 1) * BITS_PER_LIMB : (LIMBS - 1) * BITS_PER_LIMB + 32]]
- // == x[[256 : (LIMBS - 1) * BITS_PER_LIMB + 32]]
- // == x[[256 : 264]] == x // 2**256
- // clang-format on
- const int shift = 8;
- uint64_t acc = 1ull << shift;
- for(int i = 0; i < BN_LIMBS; i++) {
- acc += (((uint64_t)(BN_BASE - 1)) << shift) + x->val[i] - prime->val[i] * (uint64_t)coef;
- // acc neither overflows 64 bits nor underflows 0
- // Proof:
- // acc + (BASE - 1 << shift) + x[i] - prime[i] * coef
- // >= (BASE - 1 << shift) - prime[i] * coef
- // >= 2**shift * (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) * 255
- // == (2**shift - 255) * (2**BITS_PER_LIMB - 1)
- // == (2**8 - 255) * (2**29 - 1) == 2**29 - 1 >= 0
- // acc + (BASE - 1 << shift) + x[i] - prime[i] * coef
- // <= acc + ((BASE - 1) << shift) + x[i]
- // <= (2**(64 - BITS_PER_LIMB) - 1) + 2**shift * (2**BITS_PER_LIMB - 1)
- // + (2**32 - 1)
- // == (2**35 - 1) + 2**8 * (2**29 - 1) + 2**32
- // < 2**35 + 2**37 + 2**32 < 2**64
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 2**(64 - BITS_PER_LIMB) - 1 == 2**35 - 1
- // acc == (1 << BITS_PER_LIMB * (i + 1) + shift) + x[:i + 1]
- // - coef * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc == 1 << shift);
- // clang-format off
- // acc == 1 << shift
- // Proof:
- // acc
- // == (1 << BITS_PER_LIMB * LIMBS + shift) + x[:LIMBS] - coef * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == (1 << BITS_PER_LIMB * LIMBS + shift) + (x - coef * prime) >> BITS_PER_LIMB * LIMBS
- // <= (1 << BITS_PER_LIMB * LIMBS + shift) + (2 * prime) >> BITS_PER_LIMB * LIMBS
- // <= (1 << BITS_PER_LIMB * LIMBS + shift) + 2 * 2**256 >> BITS_PER_LIMB * LIMBS
- // <= 2**269 + 2**257 >> 2**261
- // <= 1 << 8 == 1 << shift
- // acc
- // == (1 << BITS_PER_LIMB * LIMBS + shift) + x[:LIMBS] - coef * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // >= (1 << BITS_PER_LIMB * LIMBS + shift) + 0 >> BITS_PER_LIMB * LIMBS
- // == (1 << BITS_PER_LIMB * LIMBS + shift) + 0 >> BITS_PER_LIMB * LIMBS
- // <= 1 << 8 == 1 << shift
- // clang-format on
- }
- // res = x**e % prime
- // Assumes both x and e are normalized, x < 2**259
- // Guarantees res is normalized and partly reduced modulo prime
- // Works properly even if &x == &res
- // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to e
- void bn_power_mod(const bignum256* x, const bignum256* e, const bignum256* prime, bignum256* res) {
- // Uses iterative right-to-left exponentiation by squaring, see
- // https://en.wikipedia.org/wiki/Modular_exponentiation#Right-to-left_binary_method
- bignum256 acc = {0};
- bn_copy(x, &acc);
- bn_one(res);
- for(int i = 0; i < BN_LIMBS; i++) {
- uint32_t limb = e->val[i];
- for(int j = 0; j < BN_BITS_PER_LIMB; j++) {
- // Break if the following bits of the last limb are zero
- if(i == BN_LIMBS - 1 && limb == 0) break;
- if(limb & 1)
- // acc * res < 2**519
- // Proof:
- // acc * res <= max(2**259 - 1, 2 * prime) * (2 * prime)
- // == max(2**259 - 1, 2**257) * 2**257 < 2**259 * 2**257
- // == 2**516 < 2**519
- bn_multiply(&acc, res, prime);
- limb >>= 1;
- // acc * acc < 2**519
- // Proof:
- // acc * acc <= max(2**259 - 1, 2 * prime)**2
- // <= (2**259)**2 == 2**518 < 2**519
- bn_multiply(&acc, &acc, prime);
- }
- // acc == x**(e[:i + 1]) % prime
- }
- memzero(&acc, sizeof(acc));
- }
- // x = sqrt(x) % prime
- // Explicitly x = x**((prime+1)/4) % prime
- // The other root is -sqrt(x)
- // Assumes x is normalized, x < 2**259 and quadratic residuum mod prime
- // Assumes prime is a prime number, prime % 4 == 3, it is normalized and
- // 2**256 - 2**224 <= prime <= 2**256
- // Guarantees x is normalized and fully reduced modulo prime
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to prime
- void bn_sqrt(bignum256* x, const bignum256* prime) {
- // Uses the Lagrange formula for the primes of the special form, see
- // http://en.wikipedia.org/wiki/Quadratic_residue#Prime_or_prime_power_modulus
- // If prime % 4 == 3, then sqrt(x) % prime == x**((prime+1)//4) % prime
- assert(prime->val[BN_LIMBS - 1] % 4 == 3);
- // e = (prime + 1) // 4
- bignum256 e = {0};
- bn_copy(prime, &e);
- bn_addi(&e, 1);
- bn_rshift(&e);
- bn_rshift(&e);
- bn_power_mod(x, &e, prime, x);
- bn_mod(x, prime);
- memzero(&e, sizeof(e));
- }
- // a = 1/a % 2**n
- // Assumes a is odd, 1 <= n <= 32
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to n
- uint32_t inverse_mod_power_two(uint32_t a, uint32_t n) {
- // Uses "Explicit Quadratic Modular inverse modulo 2" from section 3.3 of "On
- // Newton-Raphson iteration for multiplicative inverses modulo prime powers"
- // by Jean-Guillaume Dumas, see
- // https://arxiv.org/pdf/1209.6626.pdf
- // 1/a % 2**n
- // = (2-a) * product([1 + (a-1)**(2**i) for i in range(1, floor(log2(n)))])
- uint32_t acc = 2 - a;
- uint32_t f = a - 1;
- // mask = (1 << n) - 1
- uint32_t mask = n == 32 ? 0xFFFFFFFF : (1u << n) - 1;
- for(uint32_t i = 1; i < n; i <<= 1) {
- f = (f * f) & mask;
- acc = (acc * (1 + f)) & mask;
- }
- return acc;
- }
- // x = (x / 2**BITS_PER_LIMB) % prime
- // Assumes both x and prime are normalized
- // Assumes prime is an odd number and normalized
- // Guarantees x is normalized
- // If x is partly reduced (fully reduced) modulo prime,
- // guarantess x will be partly reduced (fully reduced) modulo prime
- void bn_divide_base(bignum256* x, const bignum256* prime) {
- // Uses an explicit formula for the modular inverse of power of two
- // (x / 2**n) % prime == (x + ((-x / prime) % 2**n) * prime) // 2**n
- // Proof:
- // (x + ((-x / prime) % 2**n) * prime) % 2**n
- // == (x - x / prime * prime) % 2**n
- // == 0
- // (x + ((-1 / prime) % 2**n) * prime) % prime
- // == x
- // if x < prime:
- // (x + ((-x / prime) % 2**n) * prime) // 2**n
- // <= ((prime - 1) + (2**n - 1) * prime) / 2**n
- // == (2**n * prime - 1) / 2**n == prime - 1 / 2**n < prime
- // if x < 2 * prime:
- // (x + ((-x / prime) % 2**n) * prime) // 2**n
- // <= ((2 * prime - 1) + (2**n - 1) * prime) / 2**n
- // == (2**n * prime + prime - 1) / 2**n
- // == prime + (prime - 1) / 2**n < 2 * prime
- // m = (-x / prime) % 2**BITS_PER_LIMB
- uint32_t m = (x->val[0] * (BN_BASE - inverse_mod_power_two(prime->val[0], BN_BITS_PER_LIMB))) &
- BN_LIMB_MASK;
- // m < 2**BITS_PER_LIMB
- uint64_t acc = x->val[0] + (uint64_t)m * prime->val[0];
- acc >>= BN_BITS_PER_LIMB;
- for(int i = 1; i < BN_LIMBS; i++) {
- acc = acc + x->val[i] + (uint64_t)m * prime->val[i];
- // acc does not overflow 64 bits
- // acc == acc + x + m * prime
- // <= 2**(64 - BITS_PER_LIMB) + 2**(BITS_PER_LIMB)
- // 2**(BITS_PER_LIMB) * 2**(BITS_PER_LIMB)
- // <= 2**(2 * BITS_PER_LIMB) + 2**(64 - BITS_PER_LIMB) +
- // 2**(BITS_PER_LIMB)
- // <= 2**58 + 2**35 + 2**29 < 2**64
- x->val[i - 1] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc < 2**35 == 2**(64 - BITS_PER_LIMB)
- // acc == x[:i + 1] + m * prime[:i + 1] >> BITS_PER_LIMB * (i + 1)
- }
- x->val[BN_LIMBS - 1] = acc;
- assert(acc >> BN_BITS_PER_LIMB == 0);
- // clang-format off
- // acc >> BITS_PER_LIMB == 0
- // Proof:
- // acc >> BITS_PER_LIMB
- // == (x[:LIMB] + m * prime[:LIMB] >> BITS_PER_LIMB * LIMBS) >> BITS_PER_LIMB * (LIMBS + 1)
- // == x + m * prime >> BITS_PER_LIMB * (LIMBS + 1)
- // <= (2**(BITS_PER_LIMB * LIMBS) - 1) + (2**BITS_PER_LIMB - 1) * (2**(BITS_PER_LIMB * LIMBS) - 1) >> BITS_PER_LIMB * (LIMBS + 1)
- // == 2**(BITS_PER_LIMB * LIMBS) - 1 + 2**(BITS_PER_LIMB * (LIMBS + 1)) - 2**(BITS_PER_LIMB * LIMBS) - 2**BITS_PER_LIMB + 1 >> BITS_PER_LIMB * (LIMBS + 1)
- // == 2**(BITS_PER_LIMB * (LIMBS + 1)) - 2**BITS_PER_LIMB >> BITS_PER_LIMB * (LIMBS + 1)
- // == 0
- // clang-format on
- }
- #if !USE_INVERSE_FAST
- // x = 1/x % prime if x != 0 else 0
- // Assumes x is normalized
- // Assumes prime is a prime number
- // Guarantees x is normalized and fully reduced modulo prime
- // Assumes prime is normalized, 2**256 - 2**224 <= prime <= 2**256
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to prime
- static void bn_inverse_slow(bignum256* x, const bignum256* prime) {
- // Uses formula 1/x % prime == x**(prime - 2) % prime
- // See https://en.wikipedia.org/wiki/Fermat%27s_little_theorem
- bn_fast_mod(x, prime);
- // e = prime - 2
- bignum256 e = {0};
- bn_read_uint32(2, &e);
- bn_subtract(prime, &e, &e);
- bn_power_mod(x, &e, prime, x);
- bn_mod(x, prime);
- memzero(&e, sizeof(e));
- }
- #endif
- #if false
- // x = 1/x % prime if x != 0 else 0
- // Assumes x is is_normalized
- // Assumes GCD(x, prime) = 1
- // Guarantees x is normalized and fully reduced modulo prime
- // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to prime and x
- static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) {
- // "The Almost Montgomery Inverse" from the section 3 of "Constant Time
- // Modular Inversion" by Joppe W. Bos
- // See http://www.joppebos.com/files/CTInversion.pdf
- /*
- u = prime
- v = x & prime
- s = 1
- r = 0
- k = 0
- while v != 1:
- k += 1
- if is_even(u):
- u = u // 2
- s = 2 * s
- elif is_even(v):
- v = v // 2
- r = 2 * r
- elif v < u:
- u = (u - v) // 2
- r = r + s
- s = 2 * s
- else:
- v = (v - u) // 2
- s = r + s
- r = 2 * r
- s = (s / 2**k) % prime
- return s
- */
- if (bn_is_zero(x)) return;
- bn_fast_mod(x, prime);
- bn_mod(x, prime);
- bignum256 u = {0}, v = {0}, r = {0}, s = {0};
- bn_copy(prime, &u);
- bn_copy(x, &v);
- bn_one(&s);
- bn_zero(&r);
- int k = 0;
- while (!bn_is_one(&v)) {
- if ((u.val[0] & 1) == 0) {
- bn_rshift(&u);
- bn_lshift(&s);
- } else if ((v.val[0] & 1) == 0) {
- bn_rshift(&v);
- bn_lshift(&r);
- } else if (bn_is_less(&v, &u)) {
- bn_subtract(&u, &v, &u);
- bn_rshift(&u);
- bn_add(&r, &s);
- bn_lshift(&s);
- } else {
- bn_subtract(&v, &u, &v);
- bn_rshift(&v);
- bn_add(&s, &r);
- bn_lshift(&r);
- }
- k += 1;
- assert(!bn_is_zero(&v)); // assert GCD(x, prime) == 1
- }
- // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
- for (int i = 0; i < k / BITS_PER_LIMB; i++) {
- bn_divide_base(&s, prime);
- }
- // s = s / 2**(k % BITS_PER_LIMB)
- for (int i = 0; i < k % BN_BITS_PER_LIMB; i++) {
- bn_mult_half(&s, prime);
- }
- bn_copy(&s, x);
- memzero(&u, sizeof(u));
- memzero(&v, sizeof(v));
- memzero(&r, sizeof(r));
- memzero(&s, sizeof(s));
- }
- #endif
- #if USE_INVERSE_FAST
- // x = 1/x % prime if x != 0 else 0
- // Assumes x is is_normalized
- // Assumes GCD(x, prime) = 1
- // Guarantees x is normalized and fully reduced modulo prime
- // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
- // The function has constant control flow but not constant memory access flow
- // with regard to prime and x
- static void bn_inverse_fast(bignum256* x, const bignum256* prime) {
- // Custom constant time version of "The Almost Montgomery Inverse" from the
- // section 3 of "Constant Time Modular Inversion" by Joppe W. Bos
- // See http://www.joppebos.com/files/CTInversion.pdf
- /*
- u = prime
- v = x % prime
- s = 1
- r = 0
- k = 0
- while v != 1:
- k += 1
- if is_even(u): # b1
- u = u // 2
- s = 2 * s
- elif is_even(v): # b2
- v = v // 2
- r = 2 * r
- elif v < u: # b3
- u = (u - v) // 2
- r = r + s
- s = 2 * s
- else: # b4
- v = (v - u) // 2
- s = r + s
- r = 2 * r
- s = (s / 2**k) % prime
- return s
- */
- bn_fast_mod(x, prime);
- bn_mod(x, prime);
- bignum256 u = {0}, v = {0}, r = {0}, s = {0};
- bn_copy(prime, &u);
- bn_copy(x, &v);
- bn_one(&s);
- bn_zero(&r);
- bignum256 zero = {0};
- bn_zero(&zero);
- int k = 0;
- int finished = 0, u_even = 0, v_even = 0, v_less_u = 0, b1 = 0, b2 = 0, b3 = 0, b4 = 0;
- finished = 0;
- for(int i = 0; i < 2 * BN_LIMBS * BN_BITS_PER_LIMB; i++) {
- finished = finished | -bn_is_one(&v);
- u_even = -bn_is_even(&u);
- v_even = -bn_is_even(&v);
- v_less_u = -bn_is_less(&v, &u);
- b1 = ~finished & u_even;
- b2 = ~finished & ~b1 & v_even;
- b3 = ~finished & ~b1 & ~b2 & v_less_u;
- b4 = ~finished & ~b1 & ~b2 & ~b3;
- // The ternary operator for pointers with constant control flow
- // BN_INVERSE_FAST_TERNARY(c, t, f) = t if c else f
- // Very nasty hack, sorry for that
- #define BN_INVERSE_FAST_TERNARY(c, t, f) \
- ((void*)(((c) & (uintptr_t)(t)) | (~(c) & (uintptr_t)(f))))
- bn_subtract(
- BN_INVERSE_FAST_TERNARY(b3, &u, &v),
- BN_INVERSE_FAST_TERNARY(b3 | b4, BN_INVERSE_FAST_TERNARY(b3, &v, &u), &zero),
- BN_INVERSE_FAST_TERNARY(b3, &u, &v));
- bn_add(
- BN_INVERSE_FAST_TERNARY(b3, &r, &s),
- BN_INVERSE_FAST_TERNARY(b3 | b4, BN_INVERSE_FAST_TERNARY(b3, &s, &r), &zero));
- bn_rshift(BN_INVERSE_FAST_TERNARY(b1 | b3, &u, &v));
- bn_lshift(BN_INVERSE_FAST_TERNARY(b1 | b3, &s, &r));
- k = k - ~finished;
- }
- // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
- for(int i = 0; i < 2 * BN_LIMBS; i++) {
- // s = s / 2**BITS_PER_LIMB % prime if i < k // BITS_PER_LIMB else s
- bn_copy(&s, &r);
- bn_divide_base(&r, prime);
- bn_cmov(&s, i < k / BN_BITS_PER_LIMB, &r, &s);
- }
- // s = s / 2**(k % BITS_PER_LIMB)
- for(int i = 0; i < BN_BITS_PER_LIMB; i++) {
- // s = s / 2 % prime if i < k % BITS_PER_LIMB else s
- bn_copy(&s, &r);
- bn_mult_half(&r, prime);
- bn_cmov(&s, i < k % BN_BITS_PER_LIMB, &r, &s);
- }
- bn_cmov(x, bn_is_zero(x), x, &s);
- memzero(&u, sizeof(u));
- memzero(&v, sizeof(v));
- memzero(&r, sizeof(s));
- memzero(&s, sizeof(s));
- }
- #endif
- #if false
- // x = 1/x % prime if x != 0 else 0
- // Assumes x is is_normalized
- // Assumes GCD(x, prime) = 1
- // Guarantees x is normalized and fully reduced modulo prime
- // Assumes prime is odd, normalized, 2**256 - 2**224 <= prime <= 2**256
- static void bn_inverse_fast(bignum256 *x, const bignum256 *prime) {
- // Custom constant time version of "The Almost Montgomery Inverse" from the
- // section 3 of "Constant Time Modular Inversion" by Joppe W. Bos
- // See http://www.joppebos.com/files/CTInversion.pdf
- /*
- u = prime
- v = x % prime
- s = 1
- r = 0
- k = 0
- while v != 1:
- k += 1
- if is_even(u): # b1
- u = u // 2
- s = 2 * s
- elif is_even(v): # b2
- v = v // 2
- r = 2 * r
- elif v < u: # b3
- u = (u - v) // 2
- r = r + s
- s = 2 * s
- else: # b4
- v = (v - u) // 2
- s = r + s
- r = 2 * r
- s = (s / 2**k) % prime
- return s
- */
- bn_fast_mod(x, prime);
- bn_mod(x, prime);
- bignum256 u = {0}, v = {0}, r = {0}, s = {0};
- bn_copy(prime, &u);
- bn_copy(x, &v);
- bn_one(&s);
- bn_zero(&r);
- bignum256 zero = {0};
- bn_zero(&zero);
- int k = 0;
- uint32_t finished = 0, u_even = 0, v_even = 0, v_less_u = 0, b1 = 0, b2 = 0,
- b3 = 0, b4 = 0;
- finished = 0;
- bignum256 u_half = {0}, v_half = {0}, u_minus_v_half = {0}, v_minus_u_half = {0}, r_plus_s = {0}, r_twice = {0}, s_twice = {0};
- for (int i = 0; i < 2 * BN_LIMBS * BN_BITS_PER_LIMB; i++) {
- finished = finished | bn_is_one(&v);
- u_even = bn_is_even(&u);
- v_even = bn_is_even(&v);
- v_less_u = bn_is_less(&v, &u);
- b1 = (finished ^ 1) & u_even;
- b2 = (finished ^ 1) & (b1 ^ 1) & v_even;
- b3 = (finished ^ 1) & (b1 ^ 1) & (b2 ^ 1) & v_less_u;
- b4 = (finished ^ 1) & (b1 ^ 1) & (b2 ^ 1) & (b3 ^ 1);
- // u_half = u // 2
- bn_copy(&u, &u_half);
- bn_rshift(&u_half);
- // v_half = v // 2
- bn_copy(&v, &v_half);
- bn_rshift(&v_half);
- // u_minus_v_half = (u - v) // 2
- bn_subtract(&u, &v, &u_minus_v_half);
- bn_rshift(&u_minus_v_half);
- // v_minus_u_half = (v - u) // 2
- bn_subtract(&v, &u, &v_minus_u_half);
- bn_rshift(&v_minus_u_half);
- // r_plus_s = r + s
- bn_copy(&r, &r_plus_s);
- bn_add(&r_plus_s, &s);
- // r_twice = 2 * r
- bn_copy(&r, &r_twice);
- bn_lshift(&r_twice);
- // s_twice = 2 * s
- bn_copy(&s, &s_twice);
- bn_lshift(&s_twice);
- bn_cmov(&u, b1, &u_half, &u);
- bn_cmov(&u, b3, &u_minus_v_half, &u);
- bn_cmov(&v, b2, &v_half, &v);
- bn_cmov(&v, b4, &v_minus_u_half, &v);
- bn_cmov(&r, b2 | b4, &r_twice, &r);
- bn_cmov(&r, b3, &r_plus_s, &r);
- bn_cmov(&s, b1 | b3, &s_twice, &s);
- bn_cmov(&s, b4, &r_plus_s, &s);
- k = k + (finished ^ 1);
- }
- // s = s / 2**(k // BITS_PER_LIMB * BITS_PER_LIMB)
- for (int i = 0; i < 2 * BN_LIMBS; i++) {
- // s = s / 2**BITS_PER_LIMB % prime if i < k // BITS_PER_LIMB else s
- bn_copy(&s, &r);
- bn_divide_base(&r, prime);
- bn_cmov(&s, i < k / BITS_PER_LIMB, &r, &s);
- }
- // s = s / 2**(k % BITS_PER_LIMB)
- for (int i = 0; i < BN_BITS_PER_LIMB; i++) {
- // s = s / 2 % prime if i < k % BITS_PER_LIMB else s
- bn_copy(&s, &r);
- bn_mult_half(&r, prime);
- bn_cmov(&s, i < k % BN_BITS_PER_LIMB, &r, &s);
- }
- bn_cmov(x, bn_is_zero(x), x, &s);
- memzero(&u, sizeof(u));
- memzero(&v, sizeof(v));
- memzero(&r, sizeof(r));
- memzero(&s, sizeof(s));
- memzero(&u_half, sizeof(u_half));
- memzero(&v_half, sizeof(v_half));
- memzero(&u_minus_v_half, sizeof(u_minus_v_half));
- memzero(&v_minus_u_half, sizeof(v_minus_u_half));
- memzero(&r_twice, sizeof(r_twice));
- memzero(&s_twice, sizeof(s_twice));
- memzero(&r_plus_s, sizeof(r_plus_s));
- }
- #endif
- // Normalizes x
- // Assumes x < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
- // Guarantees x is normalized
- void bn_normalize(bignum256* x) {
- uint32_t acc = 0;
- for(int i = 0; i < BN_LIMBS; i++) {
- acc += x->val[i];
- // acc doesn't overflow 32 bits
- // Proof:
- // acc + x[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1)
- // == 7 + 2**29 - 1 < 2**32
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= (BN_BITS_PER_LIMB);
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- }
- }
- // x = x + y
- // Assumes x, y are normalized, x + y < 2**(LIMBS*BITS_PER_LIMB) == 2**261
- // Guarantees x is normalized
- // Works properly even if &x == &y
- void bn_add(bignum256* x, const bignum256* y) {
- uint32_t acc = 0;
- for(int i = 0; i < BN_LIMBS; i++) {
- acc += x->val[i] + y->val[i];
- // acc doesn't overflow 32 bits
- // Proof:
- // acc + x[i] + y[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1)
- // == (2**(32 - BITS_PER_LIMB) - 1) + 2**(BITS_PER_LIMB + 1) - 2
- // == 7 + 2**30 - 2 < 2**32
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc == x[:i + 1] + y[:i + 1] >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc == 0); // assert x + y < 2**261
- // acc == 0
- // Proof:
- // acc == x[:LIMBS] + y[:LIMBS] >> LIMBS * BITS_PER_LIMB
- // == x + y >> LIMBS * BITS_PER_LIMB
- // <= 2**(LIMBS * BITS_PER_LIMB) - 1 >> LIMBS * BITS_PER_LIMB == 0
- }
- // x = x + y % prime
- // Assumes x, y are normalized
- // Guarantees x is normalized and partly reduced modulo prime
- // Assumes prime is normalized and 2^256 - 2^224 <= prime <= 2^256
- void bn_addmod(bignum256* x, const bignum256* y, const bignum256* prime) {
- for(int i = 0; i < BN_LIMBS; i++) {
- x->val[i] += y->val[i];
- // x[i] doesn't overflow 32 bits
- // Proof:
- // x[i] + y[i]
- // <= 2 * (2**BITS_PER_LIMB - 1)
- // == 2**30 - 2 < 2**32
- }
- bn_fast_mod(x, prime);
- }
- // x = x + y
- // Assumes x is normalized
- // Assumes y <= 2**32 - 2**29 == 2**32 - 2**BITS_PER_LIMB and
- // x + y < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
- // Guarantees x is normalized
- void bn_addi(bignum256* x, uint32_t y) {
- // assert(y <= 3758096384); // assert y <= 2**32 - 2**29
- uint32_t acc = y;
- for(int i = 0; i < BN_LIMBS; i++) {
- acc += x->val[i];
- // acc doesn't overflow 32 bits
- // Proof:
- // if i == 0:
- // acc + x[i] == y + x[0]
- // <= (2**32 - 2**BITS_PER_LIMB) + (2**BITS_PER_LIMB - 1)
- // == 2**32 - 1 < 2**32
- // else:
- // acc + x[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1)
- // == 7 + 2**29 - 1 < 2**32
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= (BN_BITS_PER_LIMB);
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc == x[:i + 1] + y >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc == 0); // assert x + y < 2**261
- // acc == 0
- // Proof:
- // acc == x[:LIMBS] + y << LIMBS * BITS_PER_LIMB
- // == x + y << LIMBS * BITS_PER_LIMB
- // <= 2**(LIMBS + BITS_PER_LIMB) - 1 << LIMBS * BITS_PER_LIMB
- // == 0
- }
- // x = x - y % prime
- // Explicitly x = x + prime - y
- // Assumes x, y are normalized
- // Assumes y < prime[0], x + prime - y < 2**261 == 2**(LIMBS * BITS_PER_LIMB)
- // Guarantees x is normalized
- // If x is fully reduced modulo prime,
- // guarantess x will be partly reduced modulo prime
- // Assumes prime is nonzero and normalized
- void bn_subi(bignum256* x, uint32_t y, const bignum256* prime) {
- assert(y < prime->val[0]);
- // x = x + prime - y
- uint32_t acc = -y;
- for(int i = 0; i < BN_LIMBS; i++) {
- acc += x->val[i] + prime->val[i];
- // acc neither overflows 32 bits nor underflows 0
- // Proof:
- // acc + x[i] + prime[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + 2 * (2**BITS_PER_LIMB - 1)
- // <= 7 + 2**30 - 2 < 2**32
- // acc + x[i] + prime[i]
- // >= -y + prime[0] >= 0
- x->val[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc == x[:i + 1] + prime[:i + 1] - y >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc == 0); // assert x + prime - y < 2**261
- // acc == 0
- // Proof:
- // acc == x[:LIMBS] + prime[:LIMBS] - y >> BITS_PER_LIMB * LIMBS
- // == x + prime - y >> BITS_PER_LIMB * LIMBS
- // <= 2**(LIMBS * BITS_PER_LIMB) - 1 >> BITS_PER_LIMB * LIMBS == 0
- }
- // res = x - y % prime
- // Explicitly res = x + (2 * prime - y)
- // Assumes x, y are normalized, y is partly reduced
- // Assumes x + 2 * prime - y < 2**261 == 2**(BITS_PER_LIMB * LIMBS)
- // Guarantees res is normalized
- // Assumes prime is nonzero and normalized
- void bn_subtractmod(const bignum256* x, const bignum256* y, bignum256* res, const bignum256* prime) {
- // res = x + (2 * prime - y)
- uint32_t acc = 1;
- for(int i = 0; i < BN_LIMBS; i++) {
- acc += (BN_BASE - 1) + x->val[i] + 2 * prime->val[i] - y->val[i];
- // acc neither overflows 32 bits nor underflows 0
- // Proof:
- // acc + (BASE - 1) + x[i] + 2 * prime[i] - y[i]
- // >= (BASE - 1) - y[i]
- // == (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1) == 0
- // acc + (BASE - 1) + x[i] + 2 * prime[i] - y[i]
- // <= acc + (BASE - 1) + x[i] + 2 * prime[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) +
- // (2**BITS_PER_LIMB - 1) + 2 * (2**BITS_PER_LIMB - 1)
- // <= (2**(32 - BITS_PER_LIMB) - 1) + 4 * (2**BITS_PER_LIMB - 1)
- // == 7 + 4 * 2**29 - 4 == 2**31 + 3 < 2**32
- res->val[i] = acc & (BN_BASE - 1);
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc == 2**(BITS_PER_LIMB * (i + 1)) + x[:i+1] - y[:i+1] + 2*prime[:i+1]
- // >> BITS_PER_LIMB * (i+1)
- }
- // assert(acc == 1); // assert x + 2 * prime - y < 2**261
- // clang-format off
- // acc == 1
- // Proof:
- // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] + 2 * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x - y + 2 * prime >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x + (2 * prime - y) >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
- // <= 2 * 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
- // == 1
- // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] + 2 * prime[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x - y + 2 * prime >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x + (2 * prime - y) >> BITS_PER_LIMB * LIMBS
- // >= 2**(BITS_PER_LIMB * LIMBS) + 0 + 1 >> BITS_PER_LIMB * LIMBS
- // == 1
- // clang-format on
- }
- // res = x - y
- // Assumes x, y are normalized and x >= y
- // Guarantees res is normalized
- // Works properly even if &x == &y or &x == &res or &y == &res or
- // &x == &y == &res
- void bn_subtract(const bignum256* x, const bignum256* y, bignum256* res) {
- uint32_t acc = 1;
- for(int i = 0; i < BN_LIMBS; i++) {
- acc += (BN_BASE - 1) + x->val[i] - y->val[i];
- // acc neither overflows 32 bits nor underflows 0
- // Proof:
- // acc + (BASE - 1) + x[i] - y[i]
- // >= (BASE - 1) - y == (2**BITS_PER_LIMB - 1) - (2**BITS_PER_LIMB - 1)
- // == 0
- // acc + (BASE - 1) + x[i] - y[i]
- // <= acc + (BASE - 1) + x[i]
- // <= (2**(32 - BITS_PER_LIMB) - 1) + (2**BITS_PER_LIMB - 1) +
- // (2**BITS_PER_LIMB - 1)
- // == 7 + 2 * 2**29 < 2 **32
- res->val[i] = acc & BN_LIMB_MASK;
- acc >>= BN_BITS_PER_LIMB;
- // acc <= 7 == 2**(32 - BITS_PER_LIMB) - 1
- // acc == 2**(BITS_PER_LIMB * (i + 1)) + x[:i + 1] - y[:i + 1]
- // >> BITS_PER_LIMB * (i + 1)
- }
- // assert(acc == 1); // assert x >= y
- // clang-format off
- // acc == 1
- // Proof:
- // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x - y >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x >> BITS_PER_LIMB * LIMBS
- // <= 2**(BITS_PER_LIMB * LIMBS) + 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
- // <= 2 * 2**(BITS_PER_LIMB * LIMBS) - 1 >> BITS_PER_LIMB * LIMBS
- // == 1
- // acc == 2**(BITS_PER_LIMB * LIMBS) + x[:LIMBS] - y[:LIMBS] >> BITS_PER_LIMB * LIMBS
- // == 2**(BITS_PER_LIMB * LIMBS) + x - y >> BITS_PER_LIMB * LIMBS
- // >= 2**(BITS_PER_LIMB * LIMBS) >> BITS_PER_LIMB * LIMBS
- // == 1
- }
- // q = x // d, r = x % d
- // Assumes x is normalized, 1 <= d <= 61304
- // Guarantees q is normalized
- void bn_long_division(bignum256 *x, uint32_t d, bignum256 *q, uint32_t *r) {
- assert(1 <= d && d < 61304);
- uint32_t acc = 0;
- *r = x->val[BN_LIMBS - 1] % d;
- q->val[BN_LIMBS - 1] = x->val[BN_LIMBS - 1] / d;
- for (int i = BN_LIMBS - 2; i >= 0; i--) {
- acc = *r * (BN_BASE % d) + x->val[i];
- // acc doesn't overflow 32 bits
- // Proof:
- // r * (BASE % d) + x[i]
- // <= (d - 1) * (d - 1) + (2**BITS_PER_LIMB - 1)
- // == d**2 - 2*d + 2**BITS_PER_LIMB
- // == 61304**2 - 2 * 61304 + 2**29
- // == 3758057808 + 2**29 < 2**32
- q->val[i] = *r * (BN_BASE / d) + (acc / d);
- // q[i] doesn't overflow 32 bits
- // Proof:
- // r * (BASE // d) + (acc // d)
- // <= (d - 1) * (2**BITS_PER_LIMB / d) +
- // ((d**2 - 2*d + 2**BITS_PER_LIMB) / d)
- // <= (d - 1) * (2**BITS_PER_LIMB / d) + (d - 2 + 2**BITS_PER_LIMB / d)
- // == (d - 1 + 1) * (2**BITS_PER_LIMB / d) + d - 2
- // == 2**BITS_PER_LIMB + d - 2 <= 2**29 + 61304 < 2**32
- // q[i] == (r * BASE + x[i]) // d
- // Proof:
- // q[i] == r * (BASE // d) + (acc // d)
- // == r * (BASE // d) + (r * (BASE % d) + x[i]) // d
- // == (r * d * (BASE // d) + r * (BASE % d) + x[i]) // d
- // == (r * (d * (BASE // d) + (BASE % d)) + x[i]) // d
- // == (r * BASE + x[i]) // d
- // q[i] < 2**BITS_PER_LIMB
- // Proof:
- // q[i] == (r * BASE + x[i]) // d
- // <= ((d - 1) * 2**BITS_PER_LIMB + (2**BITS_PER_LIMB - 1)) / d
- // == (d * 2**BITS_PER_LIMB - 1) / d == 2**BITS_PER_LIMB - 1 / d
- // < 2**BITS_PER_LIMB
- *r = acc % d;
- // r == (r * BASE + x[i]) % d
- // Proof:
- // r == acc % d == (r * (BASE % d) + x[i]) % d
- // == (r * BASE + x[i]) % d
- // x[:i] == q[:i] * d + r
- }
- }
- // x = x // 58, r = x % 58
- // Assumes x is normalized
- // Guarantees x is normalized
- void bn_divmod58(bignum256 *x, uint32_t *r) { bn_long_division(x, 58, x, r); }
- // x = x // 1000, r = x % 1000
- // Assumes x is normalized
- // Guarantees x is normalized
- void bn_divmod1000(bignum256 *x, uint32_t *r) {
- bn_long_division(x, 1000, x, r);
- }
- // x = x // 10, r = x % 10
- // Assumes x is normalized
- // Guarantees x is normalized
- void bn_divmod10(bignum256 *x, uint32_t *r) { bn_long_division(x, 10, x, r); }
- // Formats amount
- // Assumes amount is normalized
- // Assumes prefix and suffix are null-terminated strings
- // Assumes output is an array of length output_length
- // The function doesn't have neither constant control flow nor constant memory
- // access flow with regard to any its argument
- size_t bn_format(const bignum256 *amount, const char *prefix, const char *suffix, unsigned int decimals, int exponent, bool trailing, char thousands, char *output, size_t output_length) {
- /*
- Python prototype of the function:
- def format(amount, prefix, suffix, decimals, exponent, trailing, thousands):
- if exponent >= 0:
- amount *= 10**exponent
- else:
- amount //= 10 ** (-exponent)
- d = pow(10, decimals)
- integer_part = amount // d
- integer_str = f"{integer_part:,}".replace(",", thousands or "")
- if decimals:
- decimal_part = amount % d
- decimal_str = f".{decimal_part:0{decimals}d}"
- if not trailing:
- decimal_str = decimal_str.rstrip("0").rstrip(".")
- else:
- decimal_str = ""
- return prefix + integer_str + decimal_str + suffix
- */
- // Auxiliary macro for bn_format
- // If enough space adds one character to output starting from the end
- #define BN_FORMAT_ADD_OUTPUT_CHAR(c) \
- { \
- --position; \
- if (output <= position && position < output + output_length) { \
- *position = (c); \
- } else { \
- memset(output, '\0', output_length); \
- return 0; \
- } \
- }
- bignum256 temp = {0};
- bn_copy(amount, &temp);
- uint32_t digit = 0;
- char *position = output + output_length;
- // Add string ending character
- BN_FORMAT_ADD_OUTPUT_CHAR('\0');
- // Add suffix
- size_t suffix_length = suffix ? strlen(suffix) : 0;
- for (int i = suffix_length - 1; i >= 0; --i)
- BN_FORMAT_ADD_OUTPUT_CHAR(suffix[i])
- // amount //= 10**exponent
- for (; exponent < 0; ++exponent) {
- // if temp == 0, there is no need to divide it by 10 anymore
- if (bn_is_zero(&temp)) {
- exponent = 0;
- break;
- }
- bn_divmod10(&temp, &digit);
- }
- // exponent >= 0 && decimals >= 0
- bool fractional_part = false; // is fractional-part of amount present
- { // Add fractional-part digits of amount
- // Add trailing zeroes
- unsigned int trailing_zeros = decimals < (unsigned int) exponent ? decimals : (unsigned int) exponent;
- // When casting a negative int to unsigned int, UINT_MAX is added to the int before
- // Since exponent >= 0, the value remains unchanged
- decimals -= trailing_zeros;
- exponent -= trailing_zeros;
- if (trailing && trailing_zeros) {
- fractional_part = true;
- for (; trailing_zeros > 0; --trailing_zeros)
- BN_FORMAT_ADD_OUTPUT_CHAR('0')
- }
- // exponent == 0 || decimals == 0
- // Add significant digits and leading zeroes
- for (; decimals > 0; --decimals) {
- bn_divmod10(&temp, &digit);
- if (fractional_part || digit || trailing) {
- fractional_part = true;
- BN_FORMAT_ADD_OUTPUT_CHAR('0' + digit)
- }
- else if (bn_is_zero(&temp)) {
- // We break since the remaining digits are zeroes and fractional_part == trailing == false
- decimals = 0;
- break;
- }
- }
- // decimals == 0
- }
- if (fractional_part) {
- BN_FORMAT_ADD_OUTPUT_CHAR('.')
- }
- { // Add integer-part digits of amount
- // Add trailing zeroes
- int digits = 0;
- if (!bn_is_zero(&temp)) {
- for (; exponent > 0; --exponent) {
- ++digits;
- BN_FORMAT_ADD_OUTPUT_CHAR('0')
- if (thousands != 0 && digits % 3 == 0) {
- BN_FORMAT_ADD_OUTPUT_CHAR(thousands)
- }
- }
- }
- // decimals == 0 && exponent == 0
- // Add significant digits
- bool is_zero = false;
- do {
- ++digits;
- bn_divmod10(&temp, &digit);
- is_zero = bn_is_zero(&temp);
- BN_FORMAT_ADD_OUTPUT_CHAR('0' + digit)
- if (thousands != 0 && !is_zero && digits % 3 == 0) {
- BN_FORMAT_ADD_OUTPUT_CHAR(thousands)
- }
- } while (!is_zero);
- }
- // Add prefix
- size_t prefix_length = prefix ? strlen(prefix) : 0;
- for (int i = prefix_length - 1; i >= 0; --i)
- BN_FORMAT_ADD_OUTPUT_CHAR(prefix[i])
- // Move formatted amount to the start of output
- int length = output - position + output_length;
- memmove(output, position, length);
- return length - 1;
- }
- #if USE_BN_PRINT
- // Prints x in hexadecimal
- // Assumes x is normalized and x < 2**256
- void bn_print(const bignum256 *x) {
- printf("%06x", x->val[8]);
- printf("%08x", ((x->val[7] << 3) | (x->val[6] >> 26)));
- printf("%07x", ((x->val[6] << 2) | (x->val[5] >> 27)) & 0x0FFFFFFF);
- printf("%07x", ((x->val[5] << 1) | (x->val[4] >> 28)) & 0x0FFFFFFF);
- printf("%07x", x->val[4] & 0x0FFFFFFF);
- printf("%08x", ((x->val[3] << 3) | (x->val[2] >> 26)));
- printf("%07x", ((x->val[2] << 2) | (x->val[1] >> 27)) & 0x0FFFFFFF);
- printf("%07x", ((x->val[1] << 1) | (x->val[0] >> 28)) & 0x0FFFFFFF);
- printf("%07x", x->val[0] & 0x0FFFFFFF);
- }
- // Prints comma separated list of limbs of x
- void bn_print_raw(const bignum256 *x) {
- for (int i = 0; i < BN_LIMBS - 1; i++) {
- printf("0x%08x, ", x->val[i]);
- }
- printf("0x%08x", x->val[BN_LIMBS - 1]);
- }
- #endif
- #if USE_INVERSE_FAST
- void bn_inverse(bignum256 *x, const bignum256 *prime) {
- bn_inverse_fast(x, prime);
- }
- #else
- void bn_inverse(bignum256 *x, const bignum256 *prime) {
- bn_inverse_slow(x, prime);
- }
- #endif
|