sp_int.c 646 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047504850495050505150525053505450555056505750585059506050615062506350645065506650675068506950705071507250735074507550765077507850795080508150825083508450855086508750885089509050915092509350945095509650975098509951005101510251035104510551065107510851095110511151125113511451155116511751185119512051215122512351245125512651275128512951305131513251335134513551365137513851395140514151425143514451455146514751485149515051515152515351545155515651575158515951605161516251635164516551665167516851695170517151725173517451755176517751785179518051815182518351845185518651875188518951905191519251935194519551965197519851995200520152025203520452055206520752085209521052115212521352145215521652175218521952205221522252235224522552265227522852295230523152325233523452355236523752385239524052415242524352445245524652475248524952505251525252535254525552565257525852595260526152625263526452655266526752685269527052715272527352745275527652775278527952805281528252835284528552865287528852895290529152925293529452955296529752985299530053015302530353045305530653075308530953105311531253135314531553165317531853195320532153225323532453255326532753285329533053315332533353345335533653375338533953405341534253435344534553465347534853495350535153525353535453555356535753585359536053615362536353645365536653675368536953705371537253735374537553765377537853795380538153825383538453855386538753885389539053915392539353945395539653975398539954005401540254035404540554065407540854095410541154125413541454155416541754185419542054215422542354245425542654275428542954305431543254335434543554365437543854395440544154425443544454455446544754485449545054515452545354545455545654575458545954605461546254635464546554665467546854695470547154725473547454755476547754785479548054815482548354845485548654875488548954905491549254935494549554965497549854995500550155025503550455055506550755085509551055115512551355145515551655175518551955205521552255235524552555265527552855295530553155325533553455355536553755385539554055415542554355445545554655475548554955505551555255535554555555565557555855595560556155625563556455655566556755685569557055715572557355745575557655775578557955805581558255835584558555865587558855895590559155925593559455955596559755985599560056015602560356045605560656075608560956105611561256135614561556165617561856195620562156225623562456255626562756285629563056315632563356345635563656375638563956405641564256435644564556465647564856495650565156525653565456555656565756585659566056615662566356645665566656675668566956705671567256735674567556765677567856795680568156825683568456855686568756885689569056915692569356945695569656975698569957005701570257035704570557065707570857095710571157125713571457155716571757185719572057215722572357245725572657275728572957305731573257335734573557365737573857395740574157425743574457455746574757485749575057515752575357545755575657575758575957605761576257635764576557665767576857695770577157725773577457755776577757785779578057815782578357845785578657875788578957905791579257935794579557965797579857995800580158025803580458055806580758085809581058115812581358145815581658175818581958205821582258235824582558265827582858295830583158325833583458355836583758385839584058415842584358445845584658475848584958505851585258535854585558565857585858595860586158625863586458655866586758685869587058715872587358745875587658775878587958805881588258835884588558865887588858895890589158925893589458955896589758985899590059015902590359045905590659075908590959105911591259135914591559165917591859195920592159225923592459255926592759285929593059315932593359345935593659375938593959405941594259435944594559465947594859495950595159525953595459555956595759585959596059615962596359645965596659675968596959705971597259735974597559765977597859795980598159825983598459855986598759885989599059915992599359945995599659975998599960006001600260036004600560066007600860096010601160126013601460156016601760186019602060216022602360246025602660276028602960306031603260336034603560366037603860396040604160426043604460456046604760486049605060516052605360546055605660576058605960606061606260636064606560666067606860696070607160726073607460756076607760786079608060816082608360846085608660876088608960906091609260936094609560966097609860996100610161026103610461056106610761086109611061116112611361146115611661176118611961206121612261236124612561266127612861296130613161326133613461356136613761386139614061416142614361446145614661476148614961506151615261536154615561566157615861596160616161626163616461656166616761686169617061716172617361746175617661776178617961806181618261836184618561866187618861896190619161926193619461956196619761986199620062016202620362046205620662076208620962106211621262136214621562166217621862196220622162226223622462256226622762286229623062316232623362346235623662376238623962406241624262436244624562466247624862496250625162526253625462556256625762586259626062616262626362646265626662676268626962706271627262736274627562766277627862796280628162826283628462856286628762886289629062916292629362946295629662976298629963006301630263036304630563066307630863096310631163126313631463156316631763186319632063216322632363246325632663276328632963306331633263336334633563366337633863396340634163426343634463456346634763486349635063516352635363546355635663576358635963606361636263636364636563666367636863696370637163726373637463756376637763786379638063816382638363846385638663876388638963906391639263936394639563966397639863996400640164026403640464056406640764086409641064116412641364146415641664176418641964206421642264236424642564266427642864296430643164326433643464356436643764386439644064416442644364446445644664476448644964506451645264536454645564566457645864596460646164626463646464656466646764686469647064716472647364746475647664776478647964806481648264836484648564866487648864896490649164926493649464956496649764986499650065016502650365046505650665076508650965106511651265136514651565166517651865196520652165226523652465256526652765286529653065316532653365346535653665376538653965406541654265436544654565466547654865496550655165526553655465556556655765586559656065616562656365646565656665676568656965706571657265736574657565766577657865796580658165826583658465856586658765886589659065916592659365946595659665976598659966006601660266036604660566066607660866096610661166126613661466156616661766186619662066216622662366246625662666276628662966306631663266336634663566366637663866396640664166426643664466456646664766486649665066516652665366546655665666576658665966606661666266636664666566666667666866696670667166726673667466756676667766786679668066816682668366846685668666876688668966906691669266936694669566966697669866996700670167026703670467056706670767086709671067116712671367146715671667176718671967206721672267236724672567266727672867296730673167326733673467356736673767386739674067416742674367446745674667476748674967506751675267536754675567566757675867596760676167626763676467656766676767686769677067716772677367746775677667776778677967806781678267836784678567866787678867896790679167926793679467956796679767986799680068016802680368046805680668076808680968106811681268136814681568166817681868196820682168226823682468256826682768286829683068316832683368346835683668376838683968406841684268436844684568466847684868496850685168526853685468556856685768586859686068616862686368646865686668676868686968706871687268736874687568766877687868796880688168826883688468856886688768886889689068916892689368946895689668976898689969006901690269036904690569066907690869096910691169126913691469156916691769186919692069216922692369246925692669276928692969306931693269336934693569366937693869396940694169426943694469456946694769486949695069516952695369546955695669576958695969606961696269636964696569666967696869696970697169726973697469756976697769786979698069816982698369846985698669876988698969906991699269936994699569966997699869997000700170027003700470057006700770087009701070117012701370147015701670177018701970207021702270237024702570267027702870297030703170327033703470357036703770387039704070417042704370447045704670477048704970507051705270537054705570567057705870597060706170627063706470657066706770687069707070717072707370747075707670777078707970807081708270837084708570867087708870897090709170927093709470957096709770987099710071017102710371047105710671077108710971107111711271137114711571167117711871197120712171227123712471257126712771287129713071317132713371347135713671377138713971407141714271437144714571467147714871497150715171527153715471557156715771587159716071617162716371647165716671677168716971707171717271737174717571767177717871797180718171827183718471857186718771887189719071917192719371947195719671977198719972007201720272037204720572067207720872097210721172127213721472157216721772187219722072217222722372247225722672277228722972307231723272337234723572367237723872397240724172427243724472457246724772487249725072517252725372547255725672577258725972607261726272637264726572667267726872697270727172727273727472757276727772787279728072817282728372847285728672877288728972907291729272937294729572967297729872997300730173027303730473057306730773087309731073117312731373147315731673177318731973207321732273237324732573267327732873297330733173327333733473357336733773387339734073417342734373447345734673477348734973507351735273537354735573567357735873597360736173627363736473657366736773687369737073717372737373747375737673777378737973807381738273837384738573867387738873897390739173927393739473957396739773987399740074017402740374047405740674077408740974107411741274137414741574167417741874197420742174227423742474257426742774287429743074317432743374347435743674377438743974407441744274437444744574467447744874497450745174527453745474557456745774587459746074617462746374647465746674677468746974707471747274737474747574767477747874797480748174827483748474857486748774887489749074917492749374947495749674977498749975007501750275037504750575067507750875097510751175127513751475157516751775187519752075217522752375247525752675277528752975307531753275337534753575367537753875397540754175427543754475457546754775487549755075517552755375547555755675577558755975607561756275637564756575667567756875697570757175727573757475757576757775787579758075817582758375847585758675877588758975907591759275937594759575967597759875997600760176027603760476057606760776087609761076117612761376147615761676177618761976207621762276237624762576267627762876297630763176327633763476357636763776387639764076417642764376447645764676477648764976507651765276537654765576567657765876597660766176627663766476657666766776687669767076717672767376747675767676777678767976807681768276837684768576867687768876897690769176927693769476957696769776987699770077017702770377047705770677077708770977107711771277137714771577167717771877197720772177227723772477257726772777287729773077317732773377347735773677377738773977407741774277437744774577467747774877497750775177527753775477557756775777587759776077617762776377647765776677677768776977707771777277737774777577767777777877797780778177827783778477857786778777887789779077917792779377947795779677977798779978007801780278037804780578067807780878097810781178127813781478157816781778187819782078217822782378247825782678277828782978307831783278337834783578367837783878397840784178427843784478457846784778487849785078517852785378547855785678577858785978607861786278637864786578667867786878697870787178727873787478757876787778787879788078817882788378847885788678877888788978907891789278937894789578967897789878997900790179027903790479057906790779087909791079117912791379147915791679177918791979207921792279237924792579267927792879297930793179327933793479357936793779387939794079417942794379447945794679477948794979507951795279537954795579567957795879597960796179627963796479657966796779687969797079717972797379747975797679777978797979807981798279837984798579867987798879897990799179927993799479957996799779987999800080018002800380048005800680078008800980108011801280138014801580168017801880198020802180228023802480258026802780288029803080318032803380348035803680378038803980408041804280438044804580468047804880498050805180528053805480558056805780588059806080618062806380648065806680678068806980708071807280738074807580768077807880798080808180828083808480858086808780888089809080918092809380948095809680978098809981008101810281038104810581068107810881098110811181128113811481158116811781188119812081218122812381248125812681278128812981308131813281338134813581368137813881398140814181428143814481458146814781488149815081518152815381548155815681578158815981608161816281638164816581668167816881698170817181728173817481758176817781788179818081818182818381848185818681878188818981908191819281938194819581968197819881998200820182028203820482058206820782088209821082118212821382148215821682178218821982208221822282238224822582268227822882298230823182328233823482358236823782388239824082418242824382448245824682478248824982508251825282538254825582568257825882598260826182628263826482658266826782688269827082718272827382748275827682778278827982808281828282838284828582868287828882898290829182928293829482958296829782988299830083018302830383048305830683078308830983108311831283138314831583168317831883198320832183228323832483258326832783288329833083318332833383348335833683378338833983408341834283438344834583468347834883498350835183528353835483558356835783588359836083618362836383648365836683678368836983708371837283738374837583768377837883798380838183828383838483858386838783888389839083918392839383948395839683978398839984008401840284038404840584068407840884098410841184128413841484158416841784188419842084218422842384248425842684278428842984308431843284338434843584368437843884398440844184428443844484458446844784488449845084518452845384548455845684578458845984608461846284638464846584668467846884698470847184728473847484758476847784788479848084818482848384848485848684878488848984908491849284938494849584968497849884998500850185028503850485058506850785088509851085118512851385148515851685178518851985208521852285238524852585268527852885298530853185328533853485358536853785388539854085418542854385448545854685478548854985508551855285538554855585568557855885598560856185628563856485658566856785688569857085718572857385748575857685778578857985808581858285838584858585868587858885898590859185928593859485958596859785988599860086018602860386048605860686078608860986108611861286138614861586168617861886198620862186228623862486258626862786288629863086318632863386348635863686378638863986408641864286438644864586468647864886498650865186528653865486558656865786588659866086618662866386648665866686678668866986708671867286738674867586768677867886798680868186828683868486858686868786888689869086918692869386948695869686978698869987008701870287038704870587068707870887098710871187128713871487158716871787188719872087218722872387248725872687278728872987308731873287338734873587368737873887398740874187428743874487458746874787488749875087518752875387548755875687578758875987608761876287638764876587668767876887698770877187728773877487758776877787788779878087818782878387848785878687878788878987908791879287938794879587968797879887998800880188028803880488058806880788088809881088118812881388148815881688178818881988208821882288238824882588268827882888298830883188328833883488358836883788388839884088418842884388448845884688478848884988508851885288538854885588568857885888598860886188628863886488658866886788688869887088718872887388748875887688778878887988808881888288838884888588868887888888898890889188928893889488958896889788988899890089018902890389048905890689078908890989108911891289138914891589168917891889198920892189228923892489258926892789288929893089318932893389348935893689378938893989408941894289438944894589468947894889498950895189528953895489558956895789588959896089618962896389648965896689678968896989708971897289738974897589768977897889798980898189828983898489858986898789888989899089918992899389948995899689978998899990009001900290039004900590069007900890099010901190129013901490159016901790189019902090219022902390249025902690279028902990309031903290339034903590369037903890399040904190429043904490459046904790489049905090519052905390549055905690579058905990609061906290639064906590669067906890699070907190729073907490759076907790789079908090819082908390849085908690879088908990909091909290939094909590969097909890999100910191029103910491059106910791089109911091119112911391149115911691179118911991209121912291239124912591269127912891299130913191329133913491359136913791389139914091419142914391449145914691479148914991509151915291539154915591569157915891599160916191629163916491659166916791689169917091719172917391749175917691779178917991809181918291839184918591869187918891899190919191929193919491959196919791989199920092019202920392049205920692079208920992109211921292139214921592169217921892199220922192229223922492259226922792289229923092319232923392349235923692379238923992409241924292439244924592469247924892499250925192529253925492559256925792589259926092619262926392649265926692679268926992709271927292739274927592769277927892799280928192829283928492859286928792889289929092919292929392949295929692979298929993009301930293039304930593069307930893099310931193129313931493159316931793189319932093219322932393249325932693279328932993309331933293339334933593369337933893399340934193429343934493459346934793489349935093519352935393549355935693579358935993609361936293639364936593669367936893699370937193729373937493759376937793789379938093819382938393849385938693879388938993909391939293939394939593969397939893999400940194029403940494059406940794089409941094119412941394149415941694179418941994209421942294239424942594269427942894299430943194329433943494359436943794389439944094419442944394449445944694479448944994509451945294539454945594569457945894599460946194629463946494659466946794689469947094719472947394749475947694779478947994809481948294839484948594869487948894899490949194929493949494959496949794989499950095019502950395049505950695079508950995109511951295139514951595169517951895199520952195229523952495259526952795289529953095319532953395349535953695379538953995409541954295439544954595469547954895499550955195529553955495559556955795589559956095619562956395649565956695679568956995709571957295739574957595769577957895799580958195829583958495859586958795889589959095919592959395949595959695979598959996009601960296039604960596069607960896099610961196129613961496159616961796189619962096219622962396249625962696279628962996309631963296339634963596369637963896399640964196429643964496459646964796489649965096519652965396549655965696579658965996609661966296639664966596669667966896699670967196729673967496759676967796789679968096819682968396849685968696879688968996909691969296939694969596969697969896999700970197029703970497059706970797089709971097119712971397149715971697179718971997209721972297239724972597269727972897299730973197329733973497359736973797389739974097419742974397449745974697479748974997509751975297539754975597569757975897599760976197629763976497659766976797689769977097719772977397749775977697779778977997809781978297839784978597869787978897899790979197929793979497959796979797989799980098019802980398049805980698079808980998109811981298139814981598169817981898199820982198229823982498259826982798289829983098319832983398349835983698379838983998409841984298439844984598469847984898499850985198529853985498559856985798589859986098619862986398649865986698679868986998709871987298739874987598769877987898799880988198829883988498859886988798889889989098919892989398949895989698979898989999009901990299039904990599069907990899099910991199129913991499159916991799189919992099219922992399249925992699279928992999309931993299339934993599369937993899399940994199429943994499459946994799489949995099519952995399549955995699579958995999609961996299639964996599669967996899699970997199729973997499759976997799789979998099819982998399849985998699879988998999909991999299939994999599969997999899991000010001100021000310004100051000610007100081000910010100111001210013100141001510016100171001810019100201002110022100231002410025100261002710028100291003010031100321003310034100351003610037100381003910040100411004210043100441004510046100471004810049100501005110052100531005410055100561005710058100591006010061100621006310064100651006610067100681006910070100711007210073100741007510076100771007810079100801008110082100831008410085100861008710088100891009010091100921009310094100951009610097100981009910100101011010210103101041010510106101071010810109101101011110112101131011410115101161011710118101191012010121101221012310124101251012610127101281012910130101311013210133101341013510136101371013810139101401014110142101431014410145101461014710148101491015010151101521015310154101551015610157101581015910160101611016210163101641016510166101671016810169101701017110172101731017410175101761017710178101791018010181101821018310184101851018610187101881018910190101911019210193101941019510196101971019810199102001020110202102031020410205102061020710208102091021010211102121021310214102151021610217102181021910220102211022210223102241022510226102271022810229102301023110232102331023410235102361023710238102391024010241102421024310244102451024610247102481024910250102511025210253102541025510256102571025810259102601026110262102631026410265102661026710268102691027010271102721027310274102751027610277102781027910280102811028210283102841028510286102871028810289102901029110292102931029410295102961029710298102991030010301103021030310304103051030610307103081030910310103111031210313103141031510316103171031810319103201032110322103231032410325103261032710328103291033010331103321033310334103351033610337103381033910340103411034210343103441034510346103471034810349103501035110352103531035410355103561035710358103591036010361103621036310364103651036610367103681036910370103711037210373103741037510376103771037810379103801038110382103831038410385103861038710388103891039010391103921039310394103951039610397103981039910400104011040210403104041040510406104071040810409104101041110412104131041410415104161041710418104191042010421104221042310424104251042610427104281042910430104311043210433104341043510436104371043810439104401044110442104431044410445104461044710448104491045010451104521045310454104551045610457104581045910460104611046210463104641046510466104671046810469104701047110472104731047410475104761047710478104791048010481104821048310484104851048610487104881048910490104911049210493104941049510496104971049810499105001050110502105031050410505105061050710508105091051010511105121051310514105151051610517105181051910520105211052210523105241052510526105271052810529105301053110532105331053410535105361053710538105391054010541105421054310544105451054610547105481054910550105511055210553105541055510556105571055810559105601056110562105631056410565105661056710568105691057010571105721057310574105751057610577105781057910580105811058210583105841058510586105871058810589105901059110592105931059410595105961059710598105991060010601106021060310604106051060610607106081060910610106111061210613106141061510616106171061810619106201062110622106231062410625106261062710628106291063010631106321063310634106351063610637106381063910640106411064210643106441064510646106471064810649106501065110652106531065410655106561065710658106591066010661106621066310664106651066610667106681066910670106711067210673106741067510676106771067810679106801068110682106831068410685106861068710688106891069010691106921069310694106951069610697106981069910700107011070210703107041070510706107071070810709107101071110712107131071410715107161071710718107191072010721107221072310724107251072610727107281072910730107311073210733107341073510736107371073810739107401074110742107431074410745107461074710748107491075010751107521075310754107551075610757107581075910760107611076210763107641076510766107671076810769107701077110772107731077410775107761077710778107791078010781107821078310784107851078610787107881078910790107911079210793107941079510796107971079810799108001080110802108031080410805108061080710808108091081010811108121081310814108151081610817108181081910820108211082210823108241082510826108271082810829108301083110832108331083410835108361083710838108391084010841108421084310844108451084610847108481084910850108511085210853108541085510856108571085810859108601086110862108631086410865108661086710868108691087010871108721087310874108751087610877108781087910880108811088210883108841088510886108871088810889108901089110892108931089410895108961089710898108991090010901109021090310904109051090610907109081090910910109111091210913109141091510916109171091810919109201092110922109231092410925109261092710928109291093010931109321093310934109351093610937109381093910940109411094210943109441094510946109471094810949109501095110952109531095410955109561095710958109591096010961109621096310964109651096610967109681096910970109711097210973109741097510976109771097810979109801098110982109831098410985109861098710988109891099010991109921099310994109951099610997109981099911000110011100211003110041100511006110071100811009110101101111012110131101411015110161101711018110191102011021110221102311024110251102611027110281102911030110311103211033110341103511036110371103811039110401104111042110431104411045110461104711048110491105011051110521105311054110551105611057110581105911060110611106211063110641106511066110671106811069110701107111072110731107411075110761107711078110791108011081110821108311084110851108611087110881108911090110911109211093110941109511096110971109811099111001110111102111031110411105111061110711108111091111011111111121111311114111151111611117111181111911120111211112211123111241112511126111271112811129111301113111132111331113411135111361113711138111391114011141111421114311144111451114611147111481114911150111511115211153111541115511156111571115811159111601116111162111631116411165111661116711168111691117011171111721117311174111751117611177111781117911180111811118211183111841118511186111871118811189111901119111192111931119411195111961119711198111991120011201112021120311204112051120611207112081120911210112111121211213112141121511216112171121811219112201122111222112231122411225112261122711228112291123011231112321123311234112351123611237112381123911240112411124211243112441124511246112471124811249112501125111252112531125411255112561125711258112591126011261112621126311264112651126611267112681126911270112711127211273112741127511276112771127811279112801128111282112831128411285112861128711288112891129011291112921129311294112951129611297112981129911300113011130211303113041130511306113071130811309113101131111312113131131411315113161131711318113191132011321113221132311324113251132611327113281132911330113311133211333113341133511336113371133811339113401134111342113431134411345113461134711348113491135011351113521135311354113551135611357113581135911360113611136211363113641136511366113671136811369113701137111372113731137411375113761137711378113791138011381113821138311384113851138611387113881138911390113911139211393113941139511396113971139811399114001140111402114031140411405114061140711408114091141011411114121141311414114151141611417114181141911420114211142211423114241142511426114271142811429114301143111432114331143411435114361143711438114391144011441114421144311444114451144611447114481144911450114511145211453114541145511456114571145811459114601146111462114631146411465114661146711468114691147011471114721147311474114751147611477114781147911480114811148211483114841148511486114871148811489114901149111492114931149411495114961149711498114991150011501115021150311504115051150611507115081150911510115111151211513115141151511516115171151811519115201152111522115231152411525115261152711528115291153011531115321153311534115351153611537115381153911540115411154211543115441154511546115471154811549115501155111552115531155411555115561155711558115591156011561115621156311564115651156611567115681156911570115711157211573115741157511576115771157811579115801158111582115831158411585115861158711588115891159011591115921159311594115951159611597115981159911600116011160211603116041160511606116071160811609116101161111612116131161411615116161161711618116191162011621116221162311624116251162611627116281162911630116311163211633116341163511636116371163811639116401164111642116431164411645116461164711648116491165011651116521165311654116551165611657116581165911660116611166211663116641166511666116671166811669116701167111672116731167411675116761167711678116791168011681116821168311684116851168611687116881168911690116911169211693116941169511696116971169811699117001170111702117031170411705117061170711708117091171011711117121171311714117151171611717117181171911720117211172211723117241172511726117271172811729117301173111732117331173411735117361173711738117391174011741117421174311744117451174611747117481174911750117511175211753117541175511756117571175811759117601176111762117631176411765117661176711768117691177011771117721177311774117751177611777117781177911780117811178211783117841178511786117871178811789117901179111792117931179411795117961179711798117991180011801118021180311804118051180611807118081180911810118111181211813118141181511816118171181811819118201182111822118231182411825118261182711828118291183011831118321183311834118351183611837118381183911840118411184211843118441184511846118471184811849118501185111852118531185411855118561185711858118591186011861118621186311864118651186611867118681186911870118711187211873118741187511876118771187811879118801188111882118831188411885118861188711888118891189011891118921189311894118951189611897118981189911900119011190211903119041190511906119071190811909119101191111912119131191411915119161191711918119191192011921119221192311924119251192611927119281192911930119311193211933119341193511936119371193811939119401194111942119431194411945119461194711948119491195011951119521195311954119551195611957119581195911960119611196211963119641196511966119671196811969119701197111972119731197411975119761197711978119791198011981119821198311984119851198611987119881198911990119911199211993119941199511996119971199811999120001200112002120031200412005120061200712008120091201012011120121201312014120151201612017120181201912020120211202212023120241202512026120271202812029120301203112032120331203412035120361203712038120391204012041120421204312044120451204612047120481204912050120511205212053120541205512056120571205812059120601206112062120631206412065120661206712068120691207012071120721207312074120751207612077120781207912080120811208212083120841208512086120871208812089120901209112092120931209412095120961209712098120991210012101121021210312104121051210612107121081210912110121111211212113121141211512116121171211812119121201212112122121231212412125121261212712128121291213012131121321213312134121351213612137121381213912140121411214212143121441214512146121471214812149121501215112152121531215412155121561215712158121591216012161121621216312164121651216612167121681216912170121711217212173121741217512176121771217812179121801218112182121831218412185121861218712188121891219012191121921219312194121951219612197121981219912200122011220212203122041220512206122071220812209122101221112212122131221412215122161221712218122191222012221122221222312224122251222612227122281222912230122311223212233122341223512236122371223812239122401224112242122431224412245122461224712248122491225012251122521225312254122551225612257122581225912260122611226212263122641226512266122671226812269122701227112272122731227412275122761227712278122791228012281122821228312284122851228612287122881228912290122911229212293122941229512296122971229812299123001230112302123031230412305123061230712308123091231012311123121231312314123151231612317123181231912320123211232212323123241232512326123271232812329123301233112332123331233412335123361233712338123391234012341123421234312344123451234612347123481234912350123511235212353123541235512356123571235812359123601236112362123631236412365123661236712368123691237012371123721237312374123751237612377123781237912380123811238212383123841238512386123871238812389123901239112392123931239412395123961239712398123991240012401124021240312404124051240612407124081240912410124111241212413124141241512416124171241812419124201242112422124231242412425124261242712428124291243012431124321243312434124351243612437124381243912440124411244212443124441244512446124471244812449124501245112452124531245412455124561245712458124591246012461124621246312464124651246612467124681246912470124711247212473124741247512476124771247812479124801248112482124831248412485124861248712488124891249012491124921249312494124951249612497124981249912500125011250212503125041250512506125071250812509125101251112512125131251412515125161251712518125191252012521125221252312524125251252612527125281252912530125311253212533125341253512536125371253812539125401254112542125431254412545125461254712548125491255012551125521255312554125551255612557125581255912560125611256212563125641256512566125671256812569125701257112572125731257412575125761257712578125791258012581125821258312584125851258612587125881258912590125911259212593125941259512596125971259812599126001260112602126031260412605126061260712608126091261012611126121261312614126151261612617126181261912620126211262212623126241262512626126271262812629126301263112632126331263412635126361263712638126391264012641126421264312644126451264612647126481264912650126511265212653126541265512656126571265812659126601266112662126631266412665126661266712668126691267012671126721267312674126751267612677126781267912680126811268212683126841268512686126871268812689126901269112692126931269412695126961269712698126991270012701127021270312704127051270612707127081270912710127111271212713127141271512716127171271812719127201272112722127231272412725127261272712728127291273012731127321273312734127351273612737127381273912740127411274212743127441274512746127471274812749127501275112752127531275412755127561275712758127591276012761127621276312764127651276612767127681276912770127711277212773127741277512776127771277812779127801278112782127831278412785127861278712788127891279012791127921279312794127951279612797127981279912800128011280212803128041280512806128071280812809128101281112812128131281412815128161281712818128191282012821128221282312824128251282612827128281282912830128311283212833128341283512836128371283812839128401284112842128431284412845128461284712848128491285012851128521285312854128551285612857128581285912860128611286212863128641286512866128671286812869128701287112872128731287412875128761287712878128791288012881128821288312884128851288612887128881288912890128911289212893128941289512896128971289812899129001290112902129031290412905129061290712908129091291012911129121291312914129151291612917129181291912920129211292212923129241292512926129271292812929129301293112932129331293412935129361293712938129391294012941129421294312944129451294612947129481294912950129511295212953129541295512956129571295812959129601296112962129631296412965129661296712968129691297012971129721297312974129751297612977129781297912980129811298212983129841298512986129871298812989129901299112992129931299412995129961299712998129991300013001130021300313004130051300613007130081300913010130111301213013130141301513016130171301813019130201302113022130231302413025130261302713028130291303013031130321303313034130351303613037130381303913040130411304213043130441304513046130471304813049130501305113052130531305413055130561305713058130591306013061130621306313064130651306613067130681306913070130711307213073130741307513076130771307813079130801308113082130831308413085130861308713088130891309013091130921309313094130951309613097130981309913100131011310213103131041310513106131071310813109131101311113112131131311413115131161311713118131191312013121131221312313124131251312613127131281312913130131311313213133131341313513136131371313813139131401314113142131431314413145131461314713148131491315013151131521315313154131551315613157131581315913160131611316213163131641316513166131671316813169131701317113172131731317413175131761317713178131791318013181131821318313184131851318613187131881318913190131911319213193131941319513196131971319813199132001320113202132031320413205132061320713208132091321013211132121321313214132151321613217132181321913220132211322213223132241322513226132271322813229132301323113232132331323413235132361323713238132391324013241132421324313244132451324613247132481324913250132511325213253132541325513256132571325813259132601326113262132631326413265132661326713268132691327013271132721327313274132751327613277132781327913280132811328213283132841328513286132871328813289132901329113292132931329413295132961329713298132991330013301133021330313304133051330613307133081330913310133111331213313133141331513316133171331813319133201332113322133231332413325133261332713328133291333013331133321333313334133351333613337133381333913340133411334213343133441334513346133471334813349133501335113352133531335413355133561335713358133591336013361133621336313364133651336613367133681336913370133711337213373133741337513376133771337813379133801338113382133831338413385133861338713388133891339013391133921339313394133951339613397133981339913400134011340213403134041340513406134071340813409134101341113412134131341413415134161341713418134191342013421134221342313424134251342613427134281342913430134311343213433134341343513436134371343813439134401344113442134431344413445134461344713448134491345013451134521345313454134551345613457134581345913460134611346213463134641346513466134671346813469134701347113472134731347413475134761347713478134791348013481134821348313484134851348613487134881348913490134911349213493134941349513496134971349813499135001350113502135031350413505135061350713508135091351013511135121351313514135151351613517135181351913520135211352213523135241352513526135271352813529135301353113532135331353413535135361353713538135391354013541135421354313544135451354613547135481354913550135511355213553135541355513556135571355813559135601356113562135631356413565135661356713568135691357013571135721357313574135751357613577135781357913580135811358213583135841358513586135871358813589135901359113592135931359413595135961359713598135991360013601136021360313604136051360613607136081360913610136111361213613136141361513616136171361813619136201362113622136231362413625136261362713628136291363013631136321363313634136351363613637136381363913640136411364213643136441364513646136471364813649136501365113652136531365413655136561365713658136591366013661136621366313664136651366613667136681366913670136711367213673136741367513676136771367813679136801368113682136831368413685136861368713688136891369013691136921369313694136951369613697136981369913700137011370213703137041370513706137071370813709137101371113712137131371413715137161371713718137191372013721137221372313724137251372613727137281372913730137311373213733137341373513736137371373813739137401374113742137431374413745137461374713748137491375013751137521375313754137551375613757137581375913760137611376213763137641376513766137671376813769137701377113772137731377413775137761377713778137791378013781137821378313784137851378613787137881378913790137911379213793137941379513796137971379813799138001380113802138031380413805138061380713808138091381013811138121381313814138151381613817138181381913820138211382213823138241382513826138271382813829138301383113832138331383413835138361383713838138391384013841138421384313844138451384613847138481384913850138511385213853138541385513856138571385813859138601386113862138631386413865138661386713868138691387013871138721387313874138751387613877138781387913880138811388213883138841388513886138871388813889138901389113892138931389413895138961389713898138991390013901139021390313904139051390613907139081390913910139111391213913139141391513916139171391813919139201392113922139231392413925139261392713928139291393013931139321393313934139351393613937139381393913940139411394213943139441394513946139471394813949139501395113952139531395413955139561395713958139591396013961139621396313964139651396613967139681396913970139711397213973139741397513976139771397813979139801398113982139831398413985139861398713988139891399013991139921399313994139951399613997139981399914000140011400214003140041400514006140071400814009140101401114012140131401414015140161401714018140191402014021140221402314024140251402614027140281402914030140311403214033140341403514036140371403814039140401404114042140431404414045140461404714048140491405014051140521405314054140551405614057140581405914060140611406214063140641406514066140671406814069140701407114072140731407414075140761407714078140791408014081140821408314084140851408614087140881408914090140911409214093140941409514096140971409814099141001410114102141031410414105141061410714108141091411014111141121411314114141151411614117141181411914120141211412214123141241412514126141271412814129141301413114132141331413414135141361413714138141391414014141141421414314144141451414614147141481414914150141511415214153141541415514156141571415814159141601416114162141631416414165141661416714168141691417014171141721417314174141751417614177141781417914180141811418214183141841418514186141871418814189141901419114192141931419414195141961419714198141991420014201142021420314204142051420614207142081420914210142111421214213142141421514216142171421814219142201422114222142231422414225142261422714228142291423014231142321423314234142351423614237142381423914240142411424214243142441424514246142471424814249142501425114252142531425414255142561425714258142591426014261142621426314264142651426614267142681426914270142711427214273142741427514276142771427814279142801428114282142831428414285142861428714288142891429014291142921429314294142951429614297142981429914300143011430214303143041430514306143071430814309143101431114312143131431414315143161431714318143191432014321143221432314324143251432614327143281432914330143311433214333143341433514336143371433814339143401434114342143431434414345143461434714348143491435014351143521435314354143551435614357143581435914360143611436214363143641436514366143671436814369143701437114372143731437414375143761437714378143791438014381143821438314384143851438614387143881438914390143911439214393143941439514396143971439814399144001440114402144031440414405144061440714408144091441014411144121441314414144151441614417144181441914420144211442214423144241442514426144271442814429144301443114432144331443414435144361443714438144391444014441144421444314444144451444614447144481444914450144511445214453144541445514456144571445814459144601446114462144631446414465144661446714468144691447014471144721447314474144751447614477144781447914480144811448214483144841448514486144871448814489144901449114492144931449414495144961449714498144991450014501145021450314504145051450614507145081450914510145111451214513145141451514516145171451814519145201452114522145231452414525145261452714528145291453014531145321453314534145351453614537145381453914540145411454214543145441454514546145471454814549145501455114552145531455414555145561455714558145591456014561145621456314564145651456614567145681456914570145711457214573145741457514576145771457814579145801458114582145831458414585145861458714588145891459014591145921459314594145951459614597145981459914600146011460214603146041460514606146071460814609146101461114612146131461414615146161461714618146191462014621146221462314624146251462614627146281462914630146311463214633146341463514636146371463814639146401464114642146431464414645146461464714648146491465014651146521465314654146551465614657146581465914660146611466214663146641466514666146671466814669146701467114672146731467414675146761467714678146791468014681146821468314684146851468614687146881468914690146911469214693146941469514696146971469814699147001470114702147031470414705147061470714708147091471014711147121471314714147151471614717147181471914720147211472214723147241472514726147271472814729147301473114732147331473414735147361473714738147391474014741147421474314744147451474614747147481474914750147511475214753147541475514756147571475814759147601476114762147631476414765147661476714768147691477014771147721477314774147751477614777147781477914780147811478214783147841478514786147871478814789147901479114792147931479414795147961479714798147991480014801148021480314804148051480614807148081480914810148111481214813148141481514816148171481814819148201482114822148231482414825148261482714828148291483014831148321483314834148351483614837148381483914840148411484214843148441484514846148471484814849148501485114852148531485414855148561485714858148591486014861148621486314864148651486614867148681486914870148711487214873148741487514876148771487814879148801488114882148831488414885148861488714888148891489014891148921489314894148951489614897148981489914900149011490214903149041490514906149071490814909149101491114912149131491414915149161491714918149191492014921149221492314924149251492614927149281492914930149311493214933149341493514936149371493814939149401494114942149431494414945149461494714948149491495014951149521495314954149551495614957149581495914960149611496214963149641496514966149671496814969149701497114972149731497414975149761497714978149791498014981149821498314984149851498614987149881498914990149911499214993149941499514996149971499814999150001500115002150031500415005150061500715008150091501015011150121501315014150151501615017150181501915020150211502215023150241502515026150271502815029150301503115032150331503415035150361503715038150391504015041150421504315044150451504615047150481504915050150511505215053150541505515056150571505815059150601506115062150631506415065150661506715068150691507015071150721507315074150751507615077150781507915080150811508215083150841508515086150871508815089150901509115092150931509415095150961509715098150991510015101151021510315104151051510615107151081510915110151111511215113151141511515116151171511815119151201512115122151231512415125151261512715128151291513015131151321513315134151351513615137151381513915140151411514215143151441514515146151471514815149151501515115152151531515415155151561515715158151591516015161151621516315164151651516615167151681516915170151711517215173151741517515176151771517815179151801518115182151831518415185151861518715188151891519015191151921519315194151951519615197151981519915200152011520215203152041520515206152071520815209152101521115212152131521415215152161521715218152191522015221152221522315224152251522615227152281522915230152311523215233152341523515236152371523815239152401524115242152431524415245152461524715248152491525015251152521525315254152551525615257152581525915260152611526215263152641526515266152671526815269152701527115272152731527415275152761527715278152791528015281152821528315284152851528615287152881528915290152911529215293152941529515296152971529815299153001530115302153031530415305153061530715308153091531015311153121531315314153151531615317153181531915320153211532215323153241532515326153271532815329153301533115332153331533415335153361533715338153391534015341153421534315344153451534615347153481534915350153511535215353153541535515356153571535815359153601536115362153631536415365153661536715368153691537015371153721537315374153751537615377153781537915380153811538215383153841538515386153871538815389153901539115392153931539415395153961539715398153991540015401154021540315404154051540615407154081540915410154111541215413154141541515416154171541815419154201542115422154231542415425154261542715428154291543015431154321543315434154351543615437154381543915440154411544215443154441544515446154471544815449154501545115452154531545415455154561545715458154591546015461154621546315464154651546615467154681546915470154711547215473154741547515476154771547815479154801548115482154831548415485154861548715488154891549015491154921549315494154951549615497154981549915500155011550215503155041550515506155071550815509155101551115512155131551415515155161551715518155191552015521155221552315524155251552615527155281552915530155311553215533155341553515536155371553815539155401554115542155431554415545155461554715548155491555015551155521555315554155551555615557155581555915560155611556215563155641556515566155671556815569155701557115572155731557415575155761557715578155791558015581155821558315584155851558615587155881558915590155911559215593155941559515596155971559815599156001560115602156031560415605156061560715608156091561015611156121561315614156151561615617156181561915620156211562215623156241562515626156271562815629156301563115632156331563415635156361563715638156391564015641156421564315644156451564615647156481564915650156511565215653156541565515656156571565815659156601566115662156631566415665156661566715668156691567015671156721567315674156751567615677156781567915680156811568215683156841568515686156871568815689156901569115692156931569415695156961569715698156991570015701157021570315704157051570615707157081570915710157111571215713157141571515716157171571815719157201572115722157231572415725157261572715728157291573015731157321573315734157351573615737157381573915740157411574215743157441574515746157471574815749157501575115752157531575415755157561575715758157591576015761157621576315764157651576615767157681576915770157711577215773157741577515776157771577815779157801578115782157831578415785157861578715788157891579015791157921579315794157951579615797157981579915800158011580215803158041580515806158071580815809158101581115812158131581415815158161581715818158191582015821158221582315824158251582615827158281582915830158311583215833158341583515836158371583815839158401584115842158431584415845158461584715848158491585015851158521585315854158551585615857158581585915860158611586215863158641586515866158671586815869158701587115872158731587415875158761587715878158791588015881158821588315884158851588615887158881588915890158911589215893158941589515896158971589815899159001590115902159031590415905159061590715908159091591015911159121591315914159151591615917159181591915920159211592215923159241592515926159271592815929159301593115932159331593415935159361593715938159391594015941159421594315944159451594615947159481594915950159511595215953159541595515956159571595815959159601596115962159631596415965159661596715968159691597015971159721597315974159751597615977159781597915980159811598215983159841598515986159871598815989159901599115992159931599415995159961599715998159991600016001160021600316004160051600616007160081600916010160111601216013160141601516016160171601816019160201602116022160231602416025160261602716028160291603016031160321603316034160351603616037160381603916040160411604216043160441604516046160471604816049160501605116052160531605416055160561605716058160591606016061160621606316064160651606616067160681606916070160711607216073160741607516076160771607816079160801608116082160831608416085160861608716088160891609016091160921609316094160951609616097160981609916100161011610216103161041610516106161071610816109161101611116112161131611416115161161611716118161191612016121161221612316124161251612616127161281612916130161311613216133161341613516136161371613816139161401614116142161431614416145161461614716148161491615016151161521615316154161551615616157161581615916160161611616216163161641616516166161671616816169161701617116172161731617416175161761617716178161791618016181161821618316184161851618616187161881618916190161911619216193161941619516196161971619816199162001620116202162031620416205162061620716208162091621016211162121621316214162151621616217162181621916220162211622216223162241622516226162271622816229162301623116232162331623416235162361623716238162391624016241162421624316244162451624616247162481624916250162511625216253162541625516256162571625816259162601626116262162631626416265162661626716268162691627016271162721627316274162751627616277162781627916280162811628216283162841628516286162871628816289162901629116292162931629416295162961629716298162991630016301163021630316304163051630616307163081630916310163111631216313163141631516316163171631816319163201632116322163231632416325163261632716328163291633016331163321633316334163351633616337163381633916340163411634216343163441634516346163471634816349163501635116352163531635416355163561635716358163591636016361163621636316364163651636616367163681636916370163711637216373163741637516376163771637816379163801638116382163831638416385163861638716388163891639016391163921639316394163951639616397163981639916400164011640216403164041640516406164071640816409164101641116412164131641416415164161641716418164191642016421164221642316424164251642616427164281642916430164311643216433164341643516436164371643816439164401644116442164431644416445164461644716448164491645016451164521645316454164551645616457164581645916460164611646216463164641646516466164671646816469164701647116472164731647416475164761647716478164791648016481164821648316484164851648616487164881648916490164911649216493164941649516496164971649816499165001650116502165031650416505165061650716508165091651016511165121651316514165151651616517165181651916520165211652216523165241652516526165271652816529165301653116532165331653416535165361653716538165391654016541165421654316544165451654616547165481654916550165511655216553165541655516556165571655816559165601656116562165631656416565165661656716568165691657016571165721657316574165751657616577165781657916580165811658216583165841658516586165871658816589165901659116592165931659416595165961659716598165991660016601166021660316604166051660616607166081660916610166111661216613166141661516616166171661816619166201662116622166231662416625166261662716628166291663016631166321663316634166351663616637166381663916640166411664216643166441664516646166471664816649166501665116652166531665416655166561665716658166591666016661166621666316664166651666616667166681666916670166711667216673166741667516676166771667816679166801668116682166831668416685166861668716688166891669016691166921669316694166951669616697166981669916700167011670216703167041670516706167071670816709167101671116712167131671416715167161671716718167191672016721167221672316724167251672616727167281672916730167311673216733167341673516736167371673816739167401674116742167431674416745167461674716748167491675016751167521675316754167551675616757167581675916760167611676216763167641676516766167671676816769167701677116772167731677416775167761677716778167791678016781167821678316784167851678616787167881678916790167911679216793167941679516796167971679816799168001680116802168031680416805168061680716808168091681016811168121681316814168151681616817168181681916820168211682216823168241682516826168271682816829168301683116832168331683416835168361683716838168391684016841168421684316844168451684616847168481684916850168511685216853168541685516856168571685816859168601686116862168631686416865168661686716868168691687016871168721687316874168751687616877168781687916880168811688216883168841688516886168871688816889168901689116892168931689416895168961689716898168991690016901169021690316904169051690616907169081690916910169111691216913169141691516916169171691816919169201692116922169231692416925169261692716928169291693016931169321693316934169351693616937169381693916940169411694216943169441694516946169471694816949169501695116952169531695416955169561695716958169591696016961169621696316964169651696616967169681696916970169711697216973169741697516976169771697816979169801698116982169831698416985169861698716988169891699016991169921699316994169951699616997169981699917000170011700217003170041700517006170071700817009170101701117012170131701417015170161701717018170191702017021170221702317024170251702617027170281702917030170311703217033170341703517036170371703817039170401704117042170431704417045170461704717048170491705017051170521705317054170551705617057170581705917060170611706217063170641706517066170671706817069170701707117072170731707417075170761707717078170791708017081170821708317084170851708617087170881708917090170911709217093170941709517096170971709817099171001710117102171031710417105171061710717108171091711017111171121711317114171151711617117171181711917120171211712217123171241712517126171271712817129171301713117132171331713417135171361713717138171391714017141171421714317144171451714617147171481714917150171511715217153171541715517156171571715817159171601716117162171631716417165171661716717168171691717017171171721717317174171751717617177171781717917180171811718217183171841718517186171871718817189171901719117192171931719417195171961719717198171991720017201172021720317204172051720617207172081720917210172111721217213172141721517216172171721817219172201722117222172231722417225172261722717228172291723017231172321723317234172351723617237172381723917240172411724217243172441724517246172471724817249172501725117252172531725417255172561725717258172591726017261172621726317264172651726617267172681726917270172711727217273172741727517276172771727817279172801728117282172831728417285172861728717288172891729017291172921729317294172951729617297172981729917300173011730217303173041730517306173071730817309173101731117312173131731417315173161731717318173191732017321173221732317324173251732617327173281732917330173311733217333173341733517336173371733817339173401734117342173431734417345173461734717348173491735017351173521735317354173551735617357173581735917360173611736217363173641736517366173671736817369173701737117372173731737417375173761737717378173791738017381173821738317384173851738617387173881738917390173911739217393173941739517396173971739817399174001740117402174031740417405174061740717408174091741017411174121741317414174151741617417174181741917420174211742217423174241742517426174271742817429174301743117432174331743417435174361743717438174391744017441174421744317444174451744617447174481744917450174511745217453174541745517456174571745817459174601746117462174631746417465174661746717468174691747017471174721747317474174751747617477174781747917480174811748217483174841748517486174871748817489174901749117492174931749417495174961749717498174991750017501175021750317504175051750617507175081750917510175111751217513175141751517516175171751817519175201752117522175231752417525175261752717528175291753017531175321753317534175351753617537175381753917540175411754217543175441754517546175471754817549175501755117552175531755417555175561755717558175591756017561175621756317564175651756617567175681756917570175711757217573175741757517576175771757817579175801758117582175831758417585175861758717588175891759017591175921759317594175951759617597175981759917600176011760217603176041760517606176071760817609176101761117612176131761417615176161761717618176191762017621176221762317624176251762617627176281762917630176311763217633176341763517636176371763817639176401764117642176431764417645176461764717648176491765017651176521765317654176551765617657176581765917660176611766217663176641766517666176671766817669176701767117672176731767417675176761767717678176791768017681176821768317684176851768617687176881768917690176911769217693176941769517696176971769817699177001770117702177031770417705177061770717708177091771017711177121771317714177151771617717177181771917720177211772217723177241772517726177271772817729177301773117732177331773417735177361773717738177391774017741177421774317744177451774617747177481774917750177511775217753177541775517756177571775817759177601776117762177631776417765177661776717768177691777017771177721777317774177751777617777177781777917780177811778217783177841778517786177871778817789177901779117792177931779417795177961779717798177991780017801178021780317804178051780617807178081780917810178111781217813178141781517816178171781817819178201782117822178231782417825178261782717828178291783017831178321783317834178351783617837178381783917840178411784217843178441784517846178471784817849178501785117852178531785417855178561785717858178591786017861178621786317864178651786617867178681786917870178711787217873178741787517876178771787817879178801788117882178831788417885178861788717888178891789017891178921789317894178951789617897178981789917900179011790217903179041790517906179071790817909179101791117912179131791417915179161791717918179191792017921179221792317924179251792617927179281792917930179311793217933179341793517936179371793817939179401794117942179431794417945179461794717948179491795017951179521795317954179551795617957179581795917960179611796217963179641796517966179671796817969179701797117972179731797417975179761797717978179791798017981179821798317984179851798617987179881798917990179911799217993179941799517996179971799817999180001800118002180031800418005180061800718008180091801018011180121801318014180151801618017180181801918020180211802218023180241802518026180271802818029180301803118032180331803418035180361803718038180391804018041180421804318044180451804618047180481804918050180511805218053180541805518056180571805818059180601806118062180631806418065180661806718068180691807018071180721807318074180751807618077180781807918080180811808218083180841808518086180871808818089180901809118092180931809418095180961809718098180991810018101181021810318104181051810618107181081810918110181111811218113181141811518116181171811818119181201812118122181231812418125181261812718128181291813018131181321813318134181351813618137181381813918140181411814218143181441814518146181471814818149181501815118152181531815418155181561815718158181591816018161181621816318164181651816618167181681816918170181711817218173181741817518176181771817818179181801818118182181831818418185181861818718188181891819018191181921819318194181951819618197181981819918200182011820218203182041820518206182071820818209182101821118212182131821418215182161821718218182191822018221182221822318224182251822618227182281822918230182311823218233182341823518236182371823818239182401824118242182431824418245182461824718248182491825018251182521825318254182551825618257182581825918260182611826218263182641826518266182671826818269182701827118272182731827418275182761827718278182791828018281182821828318284182851828618287182881828918290182911829218293182941829518296182971829818299183001830118302183031830418305183061830718308183091831018311183121831318314183151831618317183181831918320183211832218323183241832518326183271832818329183301833118332183331833418335183361833718338183391834018341183421834318344183451834618347183481834918350183511835218353183541835518356183571835818359183601836118362183631836418365183661836718368183691837018371183721837318374183751837618377183781837918380183811838218383183841838518386183871838818389183901839118392183931839418395183961839718398183991840018401184021840318404184051840618407184081840918410184111841218413184141841518416184171841818419184201842118422184231842418425184261842718428184291843018431184321843318434184351843618437184381843918440184411844218443184441844518446184471844818449184501845118452184531845418455184561845718458184591846018461184621846318464184651846618467184681846918470184711847218473184741847518476184771847818479184801848118482184831848418485184861848718488184891849018491184921849318494184951849618497184981849918500185011850218503185041850518506185071850818509185101851118512185131851418515185161851718518185191852018521185221852318524185251852618527185281852918530185311853218533185341853518536185371853818539185401854118542185431854418545185461854718548185491855018551185521855318554185551855618557185581855918560185611856218563185641856518566185671856818569185701857118572185731857418575185761857718578185791858018581185821858318584185851858618587185881858918590185911859218593185941859518596185971859818599186001860118602186031860418605186061860718608186091861018611186121861318614186151861618617186181861918620186211862218623186241862518626186271862818629186301863118632186331863418635186361863718638186391864018641186421864318644186451864618647186481864918650186511865218653186541865518656186571865818659186601866118662186631866418665186661866718668186691867018671186721867318674186751867618677186781867918680186811868218683186841868518686186871868818689186901869118692186931869418695186961869718698186991870018701187021870318704187051870618707187081870918710187111871218713187141871518716187171871818719187201872118722187231872418725187261872718728187291873018731187321873318734187351873618737187381873918740187411874218743187441874518746187471874818749187501875118752187531875418755187561875718758187591876018761187621876318764187651876618767187681876918770187711877218773187741877518776187771877818779187801878118782187831878418785187861878718788187891879018791187921879318794187951879618797187981879918800188011880218803188041880518806188071880818809188101881118812188131881418815188161881718818188191882018821188221882318824188251882618827188281882918830188311883218833188341883518836188371883818839188401884118842188431884418845188461884718848188491885018851188521885318854188551885618857188581885918860188611886218863188641886518866188671886818869188701887118872188731887418875188761887718878188791888018881188821888318884188851888618887188881888918890188911889218893188941889518896188971889818899189001890118902189031890418905189061890718908189091891018911189121891318914189151891618917189181891918920189211892218923189241892518926189271892818929189301893118932189331893418935189361893718938189391894018941189421894318944189451894618947189481894918950189511895218953189541895518956189571895818959189601896118962189631896418965189661896718968189691897018971189721897318974189751897618977189781897918980189811898218983189841898518986189871898818989189901899118992189931899418995189961899718998189991900019001190021900319004190051900619007190081900919010190111901219013190141901519016190171901819019190201902119022190231902419025190261902719028190291903019031190321903319034190351903619037190381903919040190411904219043190441904519046190471904819049190501905119052190531905419055190561905719058190591906019061190621906319064190651906619067190681906919070190711907219073190741907519076190771907819079190801908119082190831908419085190861908719088190891909019091190921909319094190951909619097190981909919100191011910219103191041910519106191071910819109191101911119112191131911419115191161911719118191191912019121191221912319124191251912619127191281912919130191311913219133191341913519136191371913819139191401914119142191431914419145191461914719148191491915019151191521915319154191551915619157191581915919160191611916219163191641916519166191671916819169191701917119172191731917419175191761917719178191791918019181191821918319184191851918619187191881918919190191911919219193191941919519196191971919819199192001920119202192031920419205192061920719208192091921019211192121921319214192151921619217192181921919220192211922219223192241922519226192271922819229192301923119232192331923419235192361923719238192391924019241192421924319244192451924619247192481924919250192511925219253192541925519256192571925819259192601926119262192631926419265192661926719268192691927019271192721927319274192751927619277192781927919280192811928219283192841928519286192871928819289192901929119292192931929419295192961929719298192991930019301193021930319304193051930619307193081930919310193111931219313193141931519316193171931819319193201932119322193231932419325193261932719328193291933019331193321933319334193351933619337193381933919340193411934219343
  1. /* sp_int.c
  2. *
  3. * Copyright (C) 2006-2023 wolfSSL Inc.
  4. *
  5. * This file is part of wolfSSL.
  6. *
  7. * wolfSSL is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * wolfSSL is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. */
  21. /* Implementation by Sean Parkinson. */
  22. /*
  23. DESCRIPTION
  24. This library provides single precision (SP) integer math functions.
  25. */
  26. #ifdef HAVE_CONFIG_H
  27. #include <config.h>
  28. #endif
  29. #include <wolfssl/wolfcrypt/settings.h>
  30. #if defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)
  31. #if (!defined(WOLFSSL_SMALL_STACK) && !defined(SP_ALLOC)) || \
  32. defined(WOLFSSL_SP_NO_MALLOC)
  33. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  34. !defined(WOLFSSL_SP_NO_DYN_STACK)
  35. #pragma GCC diagnostic push
  36. /* We are statically declaring a variable smaller than sp_int.
  37. * We track available memory in the 'size' field.
  38. * Disable warnings of sp_int being partly outside array bounds of variable.
  39. */
  40. #pragma GCC diagnostic ignored "-Warray-bounds"
  41. #endif
  42. #endif
  43. #ifdef NO_INLINE
  44. #include <wolfssl/wolfcrypt/misc.h>
  45. #else
  46. #define WOLFSSL_MISC_INCLUDED
  47. #include <wolfcrypt/src/misc.c>
  48. #endif
  49. /* SP Build Options:
  50. * WOLFSSL_HAVE_SP_RSA: Enable SP RSA support
  51. * WOLFSSL_HAVE_SP_DH: Enable SP DH support
  52. * WOLFSSL_HAVE_SP_ECC: Enable SP ECC support
  53. * WOLFSSL_SP_MATH: Use only single precision math and algorithms
  54. * it supports (no fastmath tfm.c or normal integer.c)
  55. * WOLFSSL_SP_MATH_ALL Implementation of all MP functions
  56. * (replacement for tfm.c and integer.c)
  57. * WOLFSSL_SP_SMALL: Use smaller version of code and avoid large
  58. * stack variables
  59. * WOLFSSL_SP_NO_MALLOC: Always use stack, no heap XMALLOC/XFREE allowed
  60. * WOLFSSL_SP_NO_2048: Disable RSA/DH 2048-bit support
  61. * WOLFSSL_SP_NO_3072: Disable RSA/DH 3072-bit support
  62. * WOLFSSL_SP_4096: Enable RSA/RH 4096-bit support
  63. * WOLFSSL_SP_NO_256 Disable ECC 256-bit SECP256R1 support
  64. * WOLFSSL_SP_384 Enable ECC 384-bit SECP384R1 support
  65. * WOLFSSL_SP_521 Enable ECC 521-bit SECP521R1 support
  66. * WOLFSSL_SP_ASM Enable assembly speedups (detect platform)
  67. * WOLFSSL_SP_X86_64_ASM Enable Intel x64 assembly implementation
  68. * WOLFSSL_SP_ARM32_ASM Enable Aarch32 assembly implementation
  69. * WOLFSSL_SP_ARM64_ASM Enable Aarch64 assembly implementation
  70. * WOLFSSL_SP_ARM_CORTEX_M_ASM Enable Cortex-M assembly implementation
  71. * WOLFSSL_SP_ARM_THUMB_ASM Enable ARM Thumb assembly implementation
  72. * (used with -mthumb)
  73. * WOLFSSL_SP_X86_64 Enable Intel x86 64-bit assembly speedups
  74. * WOLFSSL_SP_X86 Enable Intel x86 assembly speedups
  75. * WOLFSSL_SP_ARM64 Enable Aarch64 assembly speedups
  76. * WOLFSSL_SP_ARM32 Enable ARM32 assembly speedups
  77. * WOLFSSL_SP_ARM32_UDIV Enable word divide asm that uses UDIV instr
  78. * WOLFSSL_SP_ARM_THUMB Enable ARM Thumb assembly speedups
  79. * (explicitly uses register 'r7')
  80. * WOLFSSL_SP_PPC64 Enable PPC64 assembly speedups
  81. * WOLFSSL_SP_PPC Enable PPC assembly speedups
  82. * WOLFSSL_SP_MIPS64 Enable MIPS64 assembly speedups
  83. * WOLFSSL_SP_MIPS Enable MIPS assembly speedups
  84. * WOLFSSL_SP_RISCV64 Enable RISCV64 assembly speedups
  85. * WOLFSSL_SP_RISCV32 Enable RISCV32 assembly speedups
  86. * WOLFSSL_SP_S390X Enable S390X assembly speedups
  87. * SP_WORD_SIZE Force 32 or 64 bit mode
  88. * WOLFSSL_SP_NONBLOCK Enables "non blocking" mode for SP math, which
  89. * will return FP_WOULDBLOCK for long operations and function must be
  90. * called again until complete.
  91. * WOLFSSL_SP_FAST_NCT_EXPTMOD Enables the faster non-constant time modular
  92. * exponentiation implementation.
  93. * WOLFSSL_SP_INT_NEGATIVE Enables negative values to be used.
  94. * WOLFSSL_SP_INT_DIGIT_ALIGN Enable when unaligned access of sp_int_digit
  95. * pointer is not allowed.
  96. * WOLFSSL_SP_NO_DYN_STACK Disable use of dynamic stack items.
  97. * Dynamic arrays used when not small stack.
  98. * WOLFSSL_SP_FAST_MODEXP Allow fast mod_exp with small C code
  99. * WOLFSSL_SP_LOW_MEM Use algorithms that use less memory.
  100. */
  101. /* TODO: WOLFSSL_SP_SMALL is incompatible with clang-12+ -Os. */
  102. #if defined(__clang__) && defined(__clang_major__) && \
  103. (__clang_major__ >= 12) && defined(WOLFSSL_SP_SMALL)
  104. #undef WOLFSSL_SP_SMALL
  105. #endif
  106. #include <wolfssl/wolfcrypt/sp_int.h>
  107. /* DECL_SP_INT: Declare one variable of type 'sp_int'. */
  108. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  109. !defined(WOLFSSL_SP_NO_MALLOC)
  110. /* Declare a variable that will be assigned a value on XMALLOC. */
  111. #define DECL_SP_INT(n, s) \
  112. sp_int* n = NULL
  113. #else
  114. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  115. !defined(WOLFSSL_SP_NO_DYN_STACK)
  116. /* Declare a variable on the stack with the required data size. */
  117. #define DECL_SP_INT(n, s) \
  118. byte n##d[MP_INT_SIZEOF(s)]; \
  119. sp_int* (n) = (sp_int*)n##d
  120. #else
  121. /* Declare a variable on the stack. */
  122. #define DECL_SP_INT(n, s) \
  123. sp_int n[1]
  124. #endif
  125. #endif
  126. /* ALLOC_SP_INT: Allocate an 'sp_int' of required size. */
  127. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  128. !defined(WOLFSSL_SP_NO_MALLOC)
  129. /* Dynamically allocate just enough data to support size. */
  130. #define ALLOC_SP_INT(n, s, err, h) \
  131. do { \
  132. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  133. (err) = MP_VAL; \
  134. } \
  135. if ((err) == MP_OKAY) { \
  136. (n) = (sp_int*)XMALLOC(MP_INT_SIZEOF(s), (h), \
  137. DYNAMIC_TYPE_BIGINT); \
  138. if ((n) == NULL) { \
  139. (err) = MP_MEM; \
  140. } \
  141. } \
  142. } \
  143. while (0)
  144. /* Dynamically allocate just enough data to support size - and set size. */
  145. #define ALLOC_SP_INT_SIZE(n, s, err, h) \
  146. do { \
  147. ALLOC_SP_INT(n, s, err, h); \
  148. if ((err) == MP_OKAY) { \
  149. (n)->size = (s); \
  150. } \
  151. } \
  152. while (0)
  153. #else
  154. /* Array declared on stack - check size is valid. */
  155. #define ALLOC_SP_INT(n, s, err, h) \
  156. do { \
  157. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  158. (err) = MP_VAL; \
  159. } \
  160. } \
  161. while (0)
  162. /* Array declared on stack - set the size field. */
  163. #define ALLOC_SP_INT_SIZE(n, s, err, h) \
  164. do { \
  165. ALLOC_SP_INT(n, s, err, h); \
  166. if ((err) == MP_OKAY) { \
  167. (n)->size = (unsigned int)(s); \
  168. } \
  169. } \
  170. while (0)
  171. #endif
  172. /* FREE_SP_INT: Free an 'sp_int' variable. */
  173. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  174. !defined(WOLFSSL_SP_NO_MALLOC)
  175. /* Free dynamically allocated data. */
  176. #define FREE_SP_INT(n, h) \
  177. do { \
  178. if ((n) != NULL) { \
  179. XFREE(n, h, DYNAMIC_TYPE_BIGINT); \
  180. } \
  181. } \
  182. while (0)
  183. #else
  184. /* Nothing to do as declared on stack. */
  185. #define FREE_SP_INT(n, h)
  186. #endif
  187. /* Declare a variable that will be assigned a value on XMALLOC. */
  188. #define DECL_DYN_SP_INT_ARRAY(n, s, c) \
  189. sp_int* n##d = NULL; \
  190. sp_int* (n)[c] = { NULL, }
  191. /* DECL_SP_INT_ARRAY: Declare array of 'sp_int'. */
  192. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  193. !defined(WOLFSSL_SP_NO_MALLOC)
  194. /* Declare a variable that will be assigned a value on XMALLOC. */
  195. #define DECL_SP_INT_ARRAY(n, s, c) \
  196. DECL_DYN_SP_INT_ARRAY(n, s, c)
  197. #else
  198. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  199. !defined(WOLFSSL_SP_NO_DYN_STACK)
  200. /* Declare a variable on the stack with the required data size. */
  201. #define DECL_SP_INT_ARRAY(n, s, c) \
  202. byte n##d[MP_INT_SIZEOF(s) * (c)]; \
  203. sp_int* (n)[c] = { NULL, }
  204. #else
  205. /* Declare a variable on the stack. */
  206. #define DECL_SP_INT_ARRAY(n, s, c) \
  207. sp_int n##d[c]; \
  208. sp_int* (n)[c]
  209. #endif
  210. #endif
  211. /* Dynamically allocate just enough data to support multiple sp_ints of the
  212. * required size. Use pointers into data to make up array and set sizes.
  213. */
  214. #define ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h) \
  215. do { \
  216. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  217. (err) = MP_VAL; \
  218. } \
  219. if ((err) == MP_OKAY) { \
  220. n##d = (sp_int*)XMALLOC(MP_INT_SIZEOF(s) * (c), (h), \
  221. DYNAMIC_TYPE_BIGINT); \
  222. if (n##d == NULL) { \
  223. (err) = MP_MEM; \
  224. } \
  225. else { \
  226. int n##ii; \
  227. (n)[0] = n##d; \
  228. (n)[0]->size = (s); \
  229. for (n##ii = 1; n##ii < (int)(c); n##ii++) { \
  230. (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
  231. (n)[n##ii]->size = (s); \
  232. } \
  233. } \
  234. } \
  235. } \
  236. while (0)
  237. /* ALLOC_SP_INT_ARRAY: Allocate an array of 'sp_int's of required size. */
  238. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  239. !defined(WOLFSSL_SP_NO_MALLOC)
  240. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  241. ALLOC_DYN_SP_INT_ARRAY(n, s, c, err, h)
  242. #else
  243. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  244. !defined(WOLFSSL_SP_NO_DYN_STACK)
  245. /* Data declared on stack that supports multiple sp_ints of the
  246. * required size. Use pointers into data to make up array and set sizes.
  247. */
  248. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  249. do { \
  250. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  251. (err) = MP_VAL; \
  252. } \
  253. if ((err) == MP_OKAY) { \
  254. int n##ii; \
  255. (n)[0] = (sp_int*)n##d; \
  256. ((sp_int_minimal*)(n)[0])->size = (s); \
  257. for (n##ii = 1; n##ii < (int)(c); n##ii++) { \
  258. (n)[n##ii] = MP_INT_NEXT((n)[n##ii-1], s); \
  259. ((sp_int_minimal*)(n)[n##ii])->size = (s); \
  260. } \
  261. } \
  262. } \
  263. while (0)
  264. #else
  265. /* Data declared on stack that supports multiple sp_ints of the
  266. * required size. Set into array and set sizes.
  267. */
  268. #define ALLOC_SP_INT_ARRAY(n, s, c, err, h) \
  269. do { \
  270. if (((err) == MP_OKAY) && ((s) > SP_INT_DIGITS)) { \
  271. (err) = MP_VAL; \
  272. } \
  273. if ((err) == MP_OKAY) { \
  274. int n##ii; \
  275. for (n##ii = 0; n##ii < (int)(c); n##ii++) { \
  276. (n)[n##ii] = &n##d[n##ii]; \
  277. (n)[n##ii]->size = (s); \
  278. } \
  279. } \
  280. } \
  281. while (0)
  282. #endif
  283. #endif
  284. /* Free data variable that was dynamically allocated. */
  285. #define FREE_DYN_SP_INT_ARRAY(n, h) \
  286. do { \
  287. if (n##d != NULL) { \
  288. XFREE(n##d, h, DYNAMIC_TYPE_BIGINT); \
  289. } \
  290. } \
  291. while (0)
  292. /* FREE_SP_INT_ARRAY: Free an array of 'sp_int'. */
  293. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  294. !defined(WOLFSSL_SP_NO_MALLOC)
  295. #define FREE_SP_INT_ARRAY(n, h) \
  296. FREE_DYN_SP_INT_ARRAY(n, h)
  297. #else
  298. /* Nothing to do as data declared on stack. */
  299. #define FREE_SP_INT_ARRAY(n, h)
  300. #endif
  301. #ifndef WOLFSSL_NO_ASM
  302. #ifdef __IAR_SYSTEMS_ICC__
  303. #define __asm__ asm
  304. #define __volatile__ volatile
  305. #endif /* __IAR_SYSTEMS_ICC__ */
  306. #ifdef __KEIL__
  307. #define __asm__ __asm
  308. #define __volatile__ volatile
  309. #endif
  310. #if defined(WOLFSSL_SP_X86_64) && SP_WORD_SIZE == 64
  311. /*
  312. * CPU: x86_64
  313. */
  314. #ifndef _MSC_VER
  315. /* Multiply va by vb and store double size result in: vh | vl */
  316. #define SP_ASM_MUL(vl, vh, va, vb) \
  317. __asm__ __volatile__ ( \
  318. "movq %[b], %%rax \n\t" \
  319. "mulq %[a] \n\t" \
  320. "movq %%rax, %[l] \n\t" \
  321. "movq %%rdx, %[h] \n\t" \
  322. : [h] "+r" (vh), [l] "+r" (vl) \
  323. : [a] "m" (va), [b] "m" (vb) \
  324. : "memory", "%rax", "%rdx", "cc" \
  325. )
  326. /* Multiply va by vb and store double size result in: vo | vh | vl */
  327. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  328. __asm__ __volatile__ ( \
  329. "movq %[b], %%rax \n\t" \
  330. "mulq %[a] \n\t" \
  331. "movq $0 , %[o] \n\t" \
  332. "movq %%rax, %[l] \n\t" \
  333. "movq %%rdx, %[h] \n\t" \
  334. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  335. : [a] "m" (va), [b] "m" (vb) \
  336. : "%rax", "%rdx", "cc" \
  337. )
  338. /* Multiply va by vb and add double size result into: vo | vh | vl */
  339. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  340. __asm__ __volatile__ ( \
  341. "movq %[b], %%rax \n\t" \
  342. "mulq %[a] \n\t" \
  343. "addq %%rax, %[l] \n\t" \
  344. "adcq %%rdx, %[h] \n\t" \
  345. "adcq $0 , %[o] \n\t" \
  346. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  347. : [a] "m" (va), [b] "m" (vb) \
  348. : "%rax", "%rdx", "cc" \
  349. )
  350. /* Multiply va by vb and add double size result into: vh | vl */
  351. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  352. __asm__ __volatile__ ( \
  353. "movq %[b], %%rax \n\t" \
  354. "mulq %[a] \n\t" \
  355. "addq %%rax, %[l] \n\t" \
  356. "adcq %%rdx, %[h] \n\t" \
  357. : [l] "+r" (vl), [h] "+r" (vh) \
  358. : [a] "m" (va), [b] "m" (vb) \
  359. : "%rax", "%rdx", "cc" \
  360. )
  361. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  362. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  363. __asm__ __volatile__ ( \
  364. "movq %[b], %%rax \n\t" \
  365. "mulq %[a] \n\t" \
  366. "addq %%rax, %[l] \n\t" \
  367. "adcq %%rdx, %[h] \n\t" \
  368. "adcq $0 , %[o] \n\t" \
  369. "addq %%rax, %[l] \n\t" \
  370. "adcq %%rdx, %[h] \n\t" \
  371. "adcq $0 , %[o] \n\t" \
  372. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  373. : [a] "m" (va), [b] "m" (vb) \
  374. : "%rax", "%rdx", "cc" \
  375. )
  376. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  377. * Assumes first add will not overflow vh | vl
  378. */
  379. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  380. __asm__ __volatile__ ( \
  381. "movq %[b], %%rax \n\t" \
  382. "mulq %[a] \n\t" \
  383. "addq %%rax, %[l] \n\t" \
  384. "adcq %%rdx, %[h] \n\t" \
  385. "addq %%rax, %[l] \n\t" \
  386. "adcq %%rdx, %[h] \n\t" \
  387. "adcq $0 , %[o] \n\t" \
  388. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  389. : [a] "m" (va), [b] "m" (vb) \
  390. : "%rax", "%rdx", "cc" \
  391. )
  392. /* Square va and store double size result in: vh | vl */
  393. #define SP_ASM_SQR(vl, vh, va) \
  394. __asm__ __volatile__ ( \
  395. "movq %[a], %%rax \n\t" \
  396. "mulq %%rax \n\t" \
  397. "movq %%rax, %[l] \n\t" \
  398. "movq %%rdx, %[h] \n\t" \
  399. : [h] "+r" (vh), [l] "+r" (vl) \
  400. : [a] "m" (va) \
  401. : "memory", "%rax", "%rdx", "cc" \
  402. )
  403. /* Square va and add double size result into: vo | vh | vl */
  404. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  405. __asm__ __volatile__ ( \
  406. "movq %[a], %%rax \n\t" \
  407. "mulq %%rax \n\t" \
  408. "addq %%rax, %[l] \n\t" \
  409. "adcq %%rdx, %[h] \n\t" \
  410. "adcq $0 , %[o] \n\t" \
  411. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  412. : [a] "m" (va) \
  413. : "%rax", "%rdx", "cc" \
  414. )
  415. /* Square va and add double size result into: vh | vl */
  416. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  417. __asm__ __volatile__ ( \
  418. "movq %[a], %%rax \n\t" \
  419. "mulq %%rax \n\t" \
  420. "addq %%rax, %[l] \n\t" \
  421. "adcq %%rdx, %[h] \n\t" \
  422. : [l] "+r" (vl), [h] "+r" (vh) \
  423. : [a] "m" (va) \
  424. : "%rax", "%rdx", "cc" \
  425. )
  426. /* Add va into: vh | vl */
  427. #define SP_ASM_ADDC(vl, vh, va) \
  428. __asm__ __volatile__ ( \
  429. "addq %[a], %[l] \n\t" \
  430. "adcq $0 , %[h] \n\t" \
  431. : [l] "+r" (vl), [h] "+r" (vh) \
  432. : [a] "m" (va) \
  433. : "cc" \
  434. )
  435. /* Add va, variable in a register, into: vh | vl */
  436. #define SP_ASM_ADDC_REG(vl, vh, va) \
  437. __asm__ __volatile__ ( \
  438. "addq %[a], %[l] \n\t" \
  439. "adcq $0 , %[h] \n\t" \
  440. : [l] "+r" (vl), [h] "+r" (vh) \
  441. : [a] "r" (va) \
  442. : "cc" \
  443. )
  444. /* Sub va from: vh | vl */
  445. #define SP_ASM_SUBB(vl, vh, va) \
  446. __asm__ __volatile__ ( \
  447. "subq %[a], %[l] \n\t" \
  448. "sbbq $0 , %[h] \n\t" \
  449. : [l] "+r" (vl), [h] "+r" (vh) \
  450. : [a] "m" (va) \
  451. : "cc" \
  452. )
  453. /* Sub va from: vh | vl */
  454. #define SP_ASM_SUBB_REG(vl, vh, va) \
  455. __asm__ __volatile__ ( \
  456. "subq %[a], %[l] \n\t" \
  457. "sbbq $0 , %[h] \n\t" \
  458. : [l] "+r" (vl), [h] "+r" (vh) \
  459. : [a] "r" (va) \
  460. : "cc" \
  461. )
  462. /* Add two times vc | vb | va into vo | vh | vl */
  463. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  464. __asm__ __volatile__ ( \
  465. "addq %[a], %[l] \n\t" \
  466. "adcq %[b], %[h] \n\t" \
  467. "adcq %[c], %[o] \n\t" \
  468. "addq %[a], %[l] \n\t" \
  469. "adcq %[b], %[h] \n\t" \
  470. "adcq %[c], %[o] \n\t" \
  471. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  472. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  473. : "cc" \
  474. )
  475. /* Index of highest bit set. */
  476. #define SP_ASM_HI_BIT_SET_IDX(va, vi) \
  477. __asm__ __volatile__ ( \
  478. "bsr %[a], %[i] \n\t" \
  479. : [i] "=r" (vi) \
  480. : [a] "r" (va) \
  481. : "cc" \
  482. )
  483. #else
  484. #include <intrin.h>
  485. /* Multiply va by vb and store double size result in: vh | vl */
  486. #define SP_ASM_MUL(vl, vh, va, vb) \
  487. vl = _umul128(va, vb, &vh)
  488. /* Multiply va by vb and store double size result in: vo | vh | vl */
  489. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  490. do { \
  491. vl = _umul128(va, vb, &vh); \
  492. vo = 0; \
  493. } \
  494. while (0)
  495. /* Multiply va by vb and add double size result into: vo | vh | vl */
  496. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  497. do { \
  498. unsigned __int64 vtl, vth; \
  499. unsigned char c; \
  500. vtl = _umul128(va, vb, &vth); \
  501. c = _addcarry_u64(0, vl, vtl, &vl); \
  502. c = _addcarry_u64(c, vh, vth, &vh); \
  503. _addcarry_u64(c, vo, 0, &vo); \
  504. } \
  505. while (0)
  506. /* Multiply va by vb and add double size result into: vh | vl */
  507. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  508. do { \
  509. unsigned __int64 vtl, vth; \
  510. unsigned char c; \
  511. vtl = _umul128(va, vb, &vth); \
  512. c = _addcarry_u64(0, vl, vtl, &vl); \
  513. _addcarry_u64(c, vh, vth, &vh); \
  514. } \
  515. while (0)
  516. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  517. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  518. do { \
  519. unsigned __int64 vtl, vth; \
  520. unsigned char c; \
  521. vtl = _umul128(va, vb, &vth); \
  522. c = _addcarry_u64(0, vl, vtl, &vl); \
  523. c = _addcarry_u64(c, vh, vth, &vh); \
  524. _addcarry_u64(c, vo, 0, &vo); \
  525. c = _addcarry_u64(0, vl, vtl, &vl); \
  526. c = _addcarry_u64(c, vh, vth, &vh); \
  527. _addcarry_u64(c, vo, 0, &vo); \
  528. } \
  529. while (0)
  530. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  531. * Assumes first add will not overflow vh | vl
  532. */
  533. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  534. do { \
  535. unsigned __int64 vtl, vth; \
  536. unsigned char c; \
  537. vtl = _umul128(va, vb, &vth); \
  538. c = _addcarry_u64(0, vl, vtl, &vl); \
  539. _addcarry_u64(c, vh, vth, &vh); \
  540. c = _addcarry_u64(0, vl, vtl, &vl); \
  541. c = _addcarry_u64(c, vh, vth, &vh); \
  542. _addcarry_u64(c, vo, 0, &vo); \
  543. } \
  544. while (0)
  545. /* Square va and store double size result in: vh | vl */
  546. #define SP_ASM_SQR(vl, vh, va) \
  547. vl = _umul128(va, va, &vh)
  548. /* Square va and add double size result into: vo | vh | vl */
  549. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  550. do { \
  551. unsigned __int64 vtl, vth; \
  552. unsigned char c; \
  553. vtl = _umul128(va, va, &vth); \
  554. c = _addcarry_u64(0, vl, vtl, &vl); \
  555. c = _addcarry_u64(c, vh, vth, &vh); \
  556. _addcarry_u64(c, vo, 0, &vo); \
  557. } \
  558. while (0)
  559. /* Square va and add double size result into: vh | vl */
  560. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  561. do { \
  562. unsigned __int64 vtl, vth; \
  563. unsigned char c; \
  564. vtl = _umul128(va, va, &vth); \
  565. c = _addcarry_u64(0, vl, vtl, &vl); \
  566. _addcarry_u64(c, vh, vth, &vh); \
  567. } \
  568. while (0)
  569. /* Add va into: vh | vl */
  570. #define SP_ASM_ADDC(vl, vh, va) \
  571. do { \
  572. unsigned char c; \
  573. c = _addcarry_u64(0, vl, va, &vl); \
  574. _addcarry_u64(c, vh, 0, &vh); \
  575. } \
  576. while (0)
  577. /* Add va, variable in a register, into: vh | vl */
  578. #define SP_ASM_ADDC_REG(vl, vh, va) \
  579. do { \
  580. unsigned char c; \
  581. c = _addcarry_u64(0, vl, va, &vl); \
  582. _addcarry_u64(c, vh, 0, &vh); \
  583. } \
  584. while (0)
  585. /* Sub va from: vh | vl */
  586. #define SP_ASM_SUBB(vl, vh, va) \
  587. do { \
  588. unsigned char c; \
  589. c = _subborrow_u64(0, vl, va, &vl); \
  590. _subborrow_u64(c, vh, 0, &vh); \
  591. } \
  592. while (0)
  593. /* Add two times vc | vb | va into vo | vh | vl */
  594. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  595. do { \
  596. unsigned char c; \
  597. c = _addcarry_u64(0, vl, va, &vl); \
  598. c = _addcarry_u64(c, vh, vb, &vh); \
  599. _addcarry_u64(c, vo, vc, &vo); \
  600. c = _addcarry_u64(0, vl, va, &vl); \
  601. c = _addcarry_u64(c, vh, vb, &vh); \
  602. _addcarry_u64(c, vo, vc, &vo); \
  603. } \
  604. while (0)
  605. /* Index of highest bit set. */
  606. #define SP_ASM_HI_BIT_SET_IDX(va, vi) \
  607. do { \
  608. unsigned long idx; \
  609. _BitScanReverse64(&idx, va); \
  610. vi = idx; \
  611. } \
  612. while (0)
  613. #endif
  614. #if !defined(WOLFSSL_SP_DIV_WORD_HALF) && (!defined(_MSC_VER) || \
  615. _MSC_VER >= 1920)
  616. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  617. *
  618. * Using divq instruction on Intel x64.
  619. *
  620. * @param [in] hi SP integer digit. High digit of the dividend.
  621. * @param [in] lo SP integer digit. Lower digit of the dividend.
  622. * @param [in] d SP integer digit. Number to divide by.
  623. * @return The division result.
  624. */
  625. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  626. sp_int_digit d)
  627. {
  628. #ifndef _MSC_VER
  629. __asm__ __volatile__ (
  630. "divq %2"
  631. : "+a" (lo)
  632. : "d" (hi), "r" (d)
  633. : "cc"
  634. );
  635. return lo;
  636. #elif defined(_MSC_VER) && _MSC_VER >= 1920
  637. return _udiv128(hi, lo, d, NULL);
  638. #endif
  639. }
  640. #define SP_ASM_DIV_WORD
  641. #endif
  642. #define SP_INT_ASM_AVAILABLE
  643. #endif /* WOLFSSL_SP_X86_64 && SP_WORD_SIZE == 64 */
  644. #if defined(WOLFSSL_SP_X86) && SP_WORD_SIZE == 32
  645. /*
  646. * CPU: x86
  647. */
  648. /* Multiply va by vb and store double size result in: vh | vl */
  649. #define SP_ASM_MUL(vl, vh, va, vb) \
  650. __asm__ __volatile__ ( \
  651. "movl %[b], %%eax \n\t" \
  652. "mull %[a] \n\t" \
  653. "movl %%eax, %[l] \n\t" \
  654. "movl %%edx, %[h] \n\t" \
  655. : [h] "+r" (vh), [l] "+r" (vl) \
  656. : [a] "m" (va), [b] "m" (vb) \
  657. : "memory", "eax", "edx", "cc" \
  658. )
  659. /* Multiply va by vb and store double size result in: vo | vh | vl */
  660. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  661. __asm__ __volatile__ ( \
  662. "movl %[b], %%eax \n\t" \
  663. "mull %[a] \n\t" \
  664. "movl $0 , %[o] \n\t" \
  665. "movl %%eax, %[l] \n\t" \
  666. "movl %%edx, %[h] \n\t" \
  667. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  668. : [a] "m" (va), [b] "m" (vb) \
  669. : "eax", "edx", "cc" \
  670. )
  671. /* Multiply va by vb and add double size result into: vo | vh | vl */
  672. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  673. __asm__ __volatile__ ( \
  674. "movl %[b], %%eax \n\t" \
  675. "mull %[a] \n\t" \
  676. "addl %%eax, %[l] \n\t" \
  677. "adcl %%edx, %[h] \n\t" \
  678. "adcl $0 , %[o] \n\t" \
  679. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  680. : [a] "r" (va), [b] "r" (vb) \
  681. : "eax", "edx", "cc" \
  682. )
  683. /* Multiply va by vb and add double size result into: vh | vl */
  684. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  685. __asm__ __volatile__ ( \
  686. "movl %[b], %%eax \n\t" \
  687. "mull %[a] \n\t" \
  688. "addl %%eax, %[l] \n\t" \
  689. "adcl %%edx, %[h] \n\t" \
  690. : [l] "+r" (vl), [h] "+r" (vh) \
  691. : [a] "m" (va), [b] "m" (vb) \
  692. : "eax", "edx", "cc" \
  693. )
  694. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  695. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  696. __asm__ __volatile__ ( \
  697. "movl %[b], %%eax \n\t" \
  698. "mull %[a] \n\t" \
  699. "addl %%eax, %[l] \n\t" \
  700. "adcl %%edx, %[h] \n\t" \
  701. "adcl $0 , %[o] \n\t" \
  702. "addl %%eax, %[l] \n\t" \
  703. "adcl %%edx, %[h] \n\t" \
  704. "adcl $0 , %[o] \n\t" \
  705. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  706. : [a] "r" (va), [b] "r" (vb) \
  707. : "eax", "edx", "cc" \
  708. )
  709. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  710. * Assumes first add will not overflow vh | vl
  711. */
  712. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  713. __asm__ __volatile__ ( \
  714. "movl %[b], %%eax \n\t" \
  715. "mull %[a] \n\t" \
  716. "addl %%eax, %[l] \n\t" \
  717. "adcl %%edx, %[h] \n\t" \
  718. "addl %%eax, %[l] \n\t" \
  719. "adcl %%edx, %[h] \n\t" \
  720. "adcl $0 , %[o] \n\t" \
  721. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  722. : [a] "m" (va), [b] "m" (vb) \
  723. : "eax", "edx", "cc" \
  724. )
  725. /* Square va and store double size result in: vh | vl */
  726. #define SP_ASM_SQR(vl, vh, va) \
  727. __asm__ __volatile__ ( \
  728. "movl %[a], %%eax \n\t" \
  729. "mull %%eax \n\t" \
  730. "movl %%eax, %[l] \n\t" \
  731. "movl %%edx, %[h] \n\t" \
  732. : [h] "+r" (vh), [l] "+r" (vl) \
  733. : [a] "m" (va) \
  734. : "memory", "eax", "edx", "cc" \
  735. )
  736. /* Square va and add double size result into: vo | vh | vl */
  737. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  738. __asm__ __volatile__ ( \
  739. "movl %[a], %%eax \n\t" \
  740. "mull %%eax \n\t" \
  741. "addl %%eax, %[l] \n\t" \
  742. "adcl %%edx, %[h] \n\t" \
  743. "adcl $0 , %[o] \n\t" \
  744. : [l] "+rm" (vl), [h] "+rm" (vh), [o] "+rm" (vo) \
  745. : [a] "m" (va) \
  746. : "eax", "edx", "cc" \
  747. )
  748. /* Square va and add double size result into: vh | vl */
  749. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  750. __asm__ __volatile__ ( \
  751. "movl %[a], %%eax \n\t" \
  752. "mull %%eax \n\t" \
  753. "addl %%eax, %[l] \n\t" \
  754. "adcl %%edx, %[h] \n\t" \
  755. : [l] "+r" (vl), [h] "+r" (vh) \
  756. : [a] "m" (va) \
  757. : "eax", "edx", "cc" \
  758. )
  759. /* Add va into: vh | vl */
  760. #define SP_ASM_ADDC(vl, vh, va) \
  761. __asm__ __volatile__ ( \
  762. "addl %[a], %[l] \n\t" \
  763. "adcl $0 , %[h] \n\t" \
  764. : [l] "+r" (vl), [h] "+r" (vh) \
  765. : [a] "m" (va) \
  766. : "cc" \
  767. )
  768. /* Add va, variable in a register, into: vh | vl */
  769. #define SP_ASM_ADDC_REG(vl, vh, va) \
  770. __asm__ __volatile__ ( \
  771. "addl %[a], %[l] \n\t" \
  772. "adcl $0 , %[h] \n\t" \
  773. : [l] "+r" (vl), [h] "+r" (vh) \
  774. : [a] "r" (va) \
  775. : "cc" \
  776. )
  777. /* Sub va from: vh | vl */
  778. #define SP_ASM_SUBB(vl, vh, va) \
  779. __asm__ __volatile__ ( \
  780. "subl %[a], %[l] \n\t" \
  781. "sbbl $0 , %[h] \n\t" \
  782. : [l] "+r" (vl), [h] "+r" (vh) \
  783. : [a] "m" (va) \
  784. : "cc" \
  785. )
  786. /* Sub va from: vh | vl */
  787. #define SP_ASM_SUBB_REG(vl, vh, va) \
  788. __asm__ __volatile__ ( \
  789. "subl %[a], %[l] \n\t" \
  790. "sbbl $0 , %[h] \n\t" \
  791. : [l] "+r" (vl), [h] "+r" (vh) \
  792. : [a] "r" (va) \
  793. : "cc" \
  794. )
  795. /* Add two times vc | vb | va into vo | vh | vl */
  796. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  797. __asm__ __volatile__ ( \
  798. "addl %[a], %[l] \n\t" \
  799. "adcl %[b], %[h] \n\t" \
  800. "adcl %[c], %[o] \n\t" \
  801. "addl %[a], %[l] \n\t" \
  802. "adcl %[b], %[h] \n\t" \
  803. "adcl %[c], %[o] \n\t" \
  804. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  805. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  806. : "cc" \
  807. )
  808. /* Index of highest bit set. */
  809. #define SP_ASM_HI_BIT_SET_IDX(va, vi) \
  810. __asm__ __volatile__ ( \
  811. "bsr %[a], %[i] \n\t" \
  812. : [i] "=r" (vi) \
  813. : [a] "r" (va) \
  814. : "cC" \
  815. )
  816. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  817. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  818. *
  819. * Using divl instruction on Intel x64.
  820. *
  821. * @param [in] hi SP integer digit. High digit of the dividend.
  822. * @param [in] lo SP integer digit. Lower digit of the dividend.
  823. * @param [in] d SP integer digit. Number to divide by.
  824. * @return The division result.
  825. */
  826. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  827. sp_int_digit d)
  828. {
  829. __asm__ __volatile__ (
  830. "divl %2"
  831. : "+a" (lo)
  832. : "d" (hi), "r" (d)
  833. : "cc"
  834. );
  835. return lo;
  836. }
  837. #define SP_ASM_DIV_WORD
  838. #endif
  839. #define SP_INT_ASM_AVAILABLE
  840. #endif /* WOLFSSL_SP_X86 && SP_WORD_SIZE == 32 */
  841. #if defined(WOLFSSL_SP_ARM64) && SP_WORD_SIZE == 64
  842. /*
  843. * CPU: Aarch64
  844. */
  845. /* Multiply va by vb and store double size result in: vh | vl */
  846. #define SP_ASM_MUL(vl, vh, va, vb) \
  847. __asm__ __volatile__ ( \
  848. "mul %[l], %[a], %[b] \n\t" \
  849. "umulh %[h], %[a], %[b] \n\t" \
  850. : [h] "+r" (vh), [l] "+r" (vl) \
  851. : [a] "r" (va), [b] "r" (vb) \
  852. : "memory", "cc" \
  853. )
  854. /* Multiply va by vb and store double size result in: vo | vh | vl */
  855. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  856. __asm__ __volatile__ ( \
  857. "mul x8, %[a], %[b] \n\t" \
  858. "umulh %[h], %[a], %[b] \n\t" \
  859. "mov %[l], x8 \n\t" \
  860. "mov %[o], xzr \n\t" \
  861. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  862. : [a] "r" (va), [b] "r" (vb) \
  863. : "x8" \
  864. )
  865. /* Multiply va by vb and add double size result into: vo | vh | vl */
  866. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  867. __asm__ __volatile__ ( \
  868. "mul x8, %[a], %[b] \n\t" \
  869. "umulh x9, %[a], %[b] \n\t" \
  870. "adds %[l], %[l], x8 \n\t" \
  871. "adcs %[h], %[h], x9 \n\t" \
  872. "adc %[o], %[o], xzr \n\t" \
  873. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  874. : [a] "r" (va), [b] "r" (vb) \
  875. : "x8", "x9", "cc" \
  876. )
  877. /* Multiply va by vb and add double size result into: vh | vl */
  878. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  879. __asm__ __volatile__ ( \
  880. "mul x8, %[a], %[b] \n\t" \
  881. "umulh x9, %[a], %[b] \n\t" \
  882. "adds %[l], %[l], x8 \n\t" \
  883. "adc %[h], %[h], x9 \n\t" \
  884. : [l] "+r" (vl), [h] "+r" (vh) \
  885. : [a] "r" (va), [b] "r" (vb) \
  886. : "x8", "x9", "cc" \
  887. )
  888. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  889. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  890. __asm__ __volatile__ ( \
  891. "mul x8, %[a], %[b] \n\t" \
  892. "umulh x9, %[a], %[b] \n\t" \
  893. "adds %[l], %[l], x8 \n\t" \
  894. "adcs %[h], %[h], x9 \n\t" \
  895. "adc %[o], %[o], xzr \n\t" \
  896. "adds %[l], %[l], x8 \n\t" \
  897. "adcs %[h], %[h], x9 \n\t" \
  898. "adc %[o], %[o], xzr \n\t" \
  899. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  900. : [a] "r" (va), [b] "r" (vb) \
  901. : "x8", "x9", "cc" \
  902. )
  903. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  904. * Assumes first add will not overflow vh | vl
  905. */
  906. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  907. __asm__ __volatile__ ( \
  908. "mul x8, %[a], %[b] \n\t" \
  909. "umulh x9, %[a], %[b] \n\t" \
  910. "adds %[l], %[l], x8 \n\t" \
  911. "adc %[h], %[h], x9 \n\t" \
  912. "adds %[l], %[l], x8 \n\t" \
  913. "adcs %[h], %[h], x9 \n\t" \
  914. "adc %[o], %[o], xzr \n\t" \
  915. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  916. : [a] "r" (va), [b] "r" (vb) \
  917. : "x8", "x9", "cc" \
  918. )
  919. /* Square va and store double size result in: vh | vl */
  920. #define SP_ASM_SQR(vl, vh, va) \
  921. __asm__ __volatile__ ( \
  922. "mul %[l], %[a], %[a] \n\t" \
  923. "umulh %[h], %[a], %[a] \n\t" \
  924. : [h] "+r" (vh), [l] "+r" (vl) \
  925. : [a] "r" (va) \
  926. : "memory" \
  927. )
  928. /* Square va and add double size result into: vo | vh | vl */
  929. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  930. __asm__ __volatile__ ( \
  931. "mul x8, %[a], %[a] \n\t" \
  932. "umulh x9, %[a], %[a] \n\t" \
  933. "adds %[l], %[l], x8 \n\t" \
  934. "adcs %[h], %[h], x9 \n\t" \
  935. "adc %[o], %[o], xzr \n\t" \
  936. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  937. : [a] "r" (va) \
  938. : "x8", "x9", "cc" \
  939. )
  940. /* Square va and add double size result into: vh | vl */
  941. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  942. __asm__ __volatile__ ( \
  943. "mul x8, %[a], %[a] \n\t" \
  944. "umulh x9, %[a], %[a] \n\t" \
  945. "adds %[l], %[l], x8 \n\t" \
  946. "adc %[h], %[h], x9 \n\t" \
  947. : [l] "+r" (vl), [h] "+r" (vh) \
  948. : [a] "r" (va) \
  949. : "x8", "x9", "cc" \
  950. )
  951. /* Add va into: vh | vl */
  952. #define SP_ASM_ADDC(vl, vh, va) \
  953. __asm__ __volatile__ ( \
  954. "adds %[l], %[l], %[a] \n\t" \
  955. "adc %[h], %[h], xzr \n\t" \
  956. : [l] "+r" (vl), [h] "+r" (vh) \
  957. : [a] "r" (va) \
  958. : "cc" \
  959. )
  960. /* Sub va from: vh | vl */
  961. #define SP_ASM_SUBB(vl, vh, va) \
  962. __asm__ __volatile__ ( \
  963. "subs %[l], %[l], %[a] \n\t" \
  964. "sbc %[h], %[h], xzr \n\t" \
  965. : [l] "+r" (vl), [h] "+r" (vh) \
  966. : [a] "r" (va) \
  967. : "cc" \
  968. )
  969. /* Add two times vc | vb | va into vo | vh | vl */
  970. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  971. __asm__ __volatile__ ( \
  972. "adds %[l], %[l], %[a] \n\t" \
  973. "adcs %[h], %[h], %[b] \n\t" \
  974. "adc %[o], %[o], %[c] \n\t" \
  975. "adds %[l], %[l], %[a] \n\t" \
  976. "adcs %[h], %[h], %[b] \n\t" \
  977. "adc %[o], %[o], %[c] \n\t" \
  978. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  979. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  980. : "cc" \
  981. )
  982. /* Count leading zeros. */
  983. #define SP_ASM_LZCNT(va, vn) \
  984. __asm__ __volatile__ ( \
  985. "clz %[n], %[a] \n\t" \
  986. : [n] "=r" (vn) \
  987. : [a] "r" (va) \
  988. : \
  989. )
  990. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  991. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  992. *
  993. * Using udiv instruction on Aarch64.
  994. * Constant time.
  995. *
  996. * @param [in] hi SP integer digit. High digit of the dividend.
  997. * @param [in] lo SP integer digit. Lower digit of the dividend.
  998. * @param [in] d SP integer digit. Number to divide by.
  999. * @return The division result.
  1000. */
  1001. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  1002. sp_int_digit d)
  1003. {
  1004. __asm__ __volatile__ (
  1005. "lsr x3, %[d], 48\n\t"
  1006. "mov x5, 16\n\t"
  1007. "cmp x3, 0\n\t"
  1008. "mov x4, 63\n\t"
  1009. "csel x3, x5, xzr, eq\n\t"
  1010. "sub x4, x4, x3\n\t"
  1011. "lsl %[d], %[d], x3\n\t"
  1012. "lsl %[hi], %[hi], x3\n\t"
  1013. "lsr x5, %[lo], x4\n\t"
  1014. "lsl %[lo], %[lo], x3\n\t"
  1015. "orr %[hi], %[hi], x5, lsr 1\n\t"
  1016. "lsr x5, %[d], 32\n\t"
  1017. "add x5, x5, 1\n\t"
  1018. "udiv x3, %[hi], x5\n\t"
  1019. "lsl x6, x3, 32\n\t"
  1020. "mul x4, %[d], x6\n\t"
  1021. "umulh x3, %[d], x6\n\t"
  1022. "subs %[lo], %[lo], x4\n\t"
  1023. "sbc %[hi], %[hi], x3\n\t"
  1024. "udiv x3, %[hi], x5\n\t"
  1025. "lsl x3, x3, 32\n\t"
  1026. "add x6, x6, x3\n\t"
  1027. "mul x4, %[d], x3\n\t"
  1028. "umulh x3, %[d], x3\n\t"
  1029. "subs %[lo], %[lo], x4\n\t"
  1030. "sbc %[hi], %[hi], x3\n\t"
  1031. "lsr x3, %[lo], 32\n\t"
  1032. "orr x3, x3, %[hi], lsl 32\n\t"
  1033. "udiv x3, x3, x5\n\t"
  1034. "add x6, x6, x3\n\t"
  1035. "mul x4, %[d], x3\n\t"
  1036. "umulh x3, %[d], x3\n\t"
  1037. "subs %[lo], %[lo], x4\n\t"
  1038. "sbc %[hi], %[hi], x3\n\t"
  1039. "lsr x3, %[lo], 32\n\t"
  1040. "orr x3, x3, %[hi], lsl 32\n\t"
  1041. "udiv x3, x3, x5\n\t"
  1042. "add x6, x6, x3\n\t"
  1043. "mul x4, %[d], x3\n\t"
  1044. "sub %[lo], %[lo], x4\n\t"
  1045. "udiv x3, %[lo], %[d]\n\t"
  1046. "add %[hi], x6, x3\n\t"
  1047. : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  1048. :
  1049. : "x3", "x4", "x5", "x6", "cc"
  1050. );
  1051. return hi;
  1052. }
  1053. #define SP_ASM_DIV_WORD
  1054. #endif
  1055. #define SP_INT_ASM_AVAILABLE
  1056. #endif /* WOLFSSL_SP_ARM64 && SP_WORD_SIZE == 64 */
  1057. #if (defined(WOLFSSL_SP_ARM32) || defined(WOLFSSL_SP_ARM_CORTEX_M)) && \
  1058. SP_WORD_SIZE == 32
  1059. /*
  1060. * CPU: ARM32 or Cortex-M4 and similar
  1061. */
  1062. /* Multiply va by vb and store double size result in: vh | vl */
  1063. #define SP_ASM_MUL(vl, vh, va, vb) \
  1064. __asm__ __volatile__ ( \
  1065. "umull %[l], %[h], %[a], %[b] \n\t" \
  1066. : [h] "+r" (vh), [l] "+r" (vl) \
  1067. : [a] "r" (va), [b] "r" (vb) \
  1068. : "memory" \
  1069. )
  1070. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1071. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1072. __asm__ __volatile__ ( \
  1073. "umull %[l], %[h], %[a], %[b] \n\t" \
  1074. "mov %[o], #0 \n\t" \
  1075. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  1076. : [a] "r" (va), [b] "r" (vb) \
  1077. : \
  1078. )
  1079. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1080. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1081. __asm__ __volatile__ ( \
  1082. "umull r8, r9, %[a], %[b] \n\t" \
  1083. "adds %[l], %[l], r8 \n\t" \
  1084. "adcs %[h], %[h], r9 \n\t" \
  1085. "adc %[o], %[o], #0 \n\t" \
  1086. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1087. : [a] "r" (va), [b] "r" (vb) \
  1088. : "r8", "r9", "cc" \
  1089. )
  1090. /* Multiply va by vb and add double size result into: vh | vl */
  1091. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1092. __asm__ __volatile__ ( \
  1093. "umlal %[l], %[h], %[a], %[b] \n\t" \
  1094. : [l] "+r" (vl), [h] "+r" (vh) \
  1095. : [a] "r" (va), [b] "r" (vb) \
  1096. : \
  1097. )
  1098. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1099. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1100. __asm__ __volatile__ ( \
  1101. "umull r8, r9, %[a], %[b] \n\t" \
  1102. "adds %[l], %[l], r8 \n\t" \
  1103. "adcs %[h], %[h], r9 \n\t" \
  1104. "adc %[o], %[o], #0 \n\t" \
  1105. "adds %[l], %[l], r8 \n\t" \
  1106. "adcs %[h], %[h], r9 \n\t" \
  1107. "adc %[o], %[o], #0 \n\t" \
  1108. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1109. : [a] "r" (va), [b] "r" (vb) \
  1110. : "r8", "r9", "cc" \
  1111. )
  1112. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1113. * Assumes first add will not overflow vh | vl
  1114. */
  1115. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1116. __asm__ __volatile__ ( \
  1117. "umull r8, r9, %[a], %[b] \n\t" \
  1118. "adds %[l], %[l], r8 \n\t" \
  1119. "adc %[h], %[h], r9 \n\t" \
  1120. "adds %[l], %[l], r8 \n\t" \
  1121. "adcs %[h], %[h], r9 \n\t" \
  1122. "adc %[o], %[o], #0 \n\t" \
  1123. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1124. : [a] "r" (va), [b] "r" (vb) \
  1125. : "r8", "r9", "cc" \
  1126. )
  1127. /* Square va and store double size result in: vh | vl */
  1128. #define SP_ASM_SQR(vl, vh, va) \
  1129. __asm__ __volatile__ ( \
  1130. "umull %[l], %[h], %[a], %[a] \n\t" \
  1131. : [h] "+r" (vh), [l] "+r" (vl) \
  1132. : [a] "r" (va) \
  1133. : "memory" \
  1134. )
  1135. /* Square va and add double size result into: vo | vh | vl */
  1136. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1137. __asm__ __volatile__ ( \
  1138. "umull r8, r9, %[a], %[a] \n\t" \
  1139. "adds %[l], %[l], r8 \n\t" \
  1140. "adcs %[h], %[h], r9 \n\t" \
  1141. "adc %[o], %[o], #0 \n\t" \
  1142. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1143. : [a] "r" (va) \
  1144. : "r8", "r9", "cc" \
  1145. )
  1146. /* Square va and add double size result into: vh | vl */
  1147. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1148. __asm__ __volatile__ ( \
  1149. "umlal %[l], %[h], %[a], %[a] \n\t" \
  1150. : [l] "+r" (vl), [h] "+r" (vh) \
  1151. : [a] "r" (va) \
  1152. : "cc" \
  1153. )
  1154. /* Add va into: vh | vl */
  1155. #define SP_ASM_ADDC(vl, vh, va) \
  1156. __asm__ __volatile__ ( \
  1157. "adds %[l], %[l], %[a] \n\t" \
  1158. "adc %[h], %[h], #0 \n\t" \
  1159. : [l] "+r" (vl), [h] "+r" (vh) \
  1160. : [a] "r" (va) \
  1161. : "cc" \
  1162. )
  1163. /* Sub va from: vh | vl */
  1164. #define SP_ASM_SUBB(vl, vh, va) \
  1165. __asm__ __volatile__ ( \
  1166. "subs %[l], %[l], %[a] \n\t" \
  1167. "sbc %[h], %[h], #0 \n\t" \
  1168. : [l] "+r" (vl), [h] "+r" (vh) \
  1169. : [a] "r" (va) \
  1170. : "cc" \
  1171. )
  1172. /* Add two times vc | vb | va into vo | vh | vl */
  1173. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1174. __asm__ __volatile__ ( \
  1175. "adds %[l], %[l], %[a] \n\t" \
  1176. "adcs %[h], %[h], %[b] \n\t" \
  1177. "adc %[o], %[o], %[c] \n\t" \
  1178. "adds %[l], %[l], %[a] \n\t" \
  1179. "adcs %[h], %[h], %[b] \n\t" \
  1180. "adc %[o], %[o], %[c] \n\t" \
  1181. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  1182. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  1183. : "cc" \
  1184. )
  1185. #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH >= 7)
  1186. /* Count leading zeros - instruction only available on ARMv7 and newer. */
  1187. #define SP_ASM_LZCNT(va, vn) \
  1188. __asm__ __volatile__ ( \
  1189. "clz %[n], %[a] \n\t" \
  1190. : [n] "=r" (vn) \
  1191. : [a] "r" (va) \
  1192. : \
  1193. )
  1194. #endif
  1195. #ifndef WOLFSSL_SP_DIV_WORD_HALF
  1196. #ifndef WOLFSSL_SP_ARM32_UDIV
  1197. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  1198. *
  1199. * No division instruction used - does operation bit by bit.
  1200. * Constant time.
  1201. *
  1202. * @param [in] hi SP integer digit. High digit of the dividend.
  1203. * @param [in] lo SP integer digit. Lower digit of the dividend.
  1204. * @param [in] d SP integer digit. Number to divide by.
  1205. * @return The division result.
  1206. */
  1207. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  1208. sp_int_digit d)
  1209. {
  1210. sp_int_digit r = 0;
  1211. #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
  1212. static const char debruijn32[32] = {
  1213. 0, 31, 9, 30, 3, 8, 13, 29, 2, 5, 7, 21, 12, 24, 28, 19,
  1214. 1, 10, 4, 14, 6, 22, 25, 20, 11, 15, 23, 26, 16, 27, 17, 18
  1215. };
  1216. static const sp_uint32 debruijn32_mul = 0x076be629;
  1217. #endif
  1218. __asm__ __volatile__ (
  1219. /* Shift d so that top bit is set. */
  1220. #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
  1221. "ldr r4, %[m]\n\t"
  1222. "mov r5, %[d]\n\t"
  1223. "orr r5, r5, r5, lsr #1\n\t"
  1224. "orr r5, r5, r5, lsr #2\n\t"
  1225. "orr r5, r5, r5, lsr #4\n\t"
  1226. "orr r5, r5, r5, lsr #8\n\t"
  1227. "orr r5, r5, r5, lsr #16\n\t"
  1228. "add r5, r5, #1\n\t"
  1229. "mul r5, r5, r4\n\t"
  1230. "lsr r5, r5, #27\n\t"
  1231. "ldrb r5, [%[t], r5]\n\t"
  1232. #else
  1233. "clz r5, %[d]\n\t"
  1234. #endif
  1235. "rsb r6, r5, #31\n\t"
  1236. "lsl %[d], %[d], r5\n\t"
  1237. "lsl %[hi], %[hi], r5\n\t"
  1238. "lsr r9, %[lo], r6\n\t"
  1239. "lsl %[lo], %[lo], r5\n\t"
  1240. "orr %[hi], %[hi], r9, lsr #1\n\t"
  1241. "lsr r5, %[d], #1\n\t"
  1242. "add r5, r5, #1\n\t"
  1243. "mov r6, %[lo]\n\t"
  1244. "mov r9, %[hi]\n\t"
  1245. /* Do top 32 */
  1246. "subs r8, r5, r9\n\t"
  1247. "sbc r8, r8, r8\n\t"
  1248. "add %[r], %[r], %[r]\n\t"
  1249. "sub %[r], %[r], r8\n\t"
  1250. "and r8, r8, r5\n\t"
  1251. "subs r9, r9, r8\n\t"
  1252. /* Next 30 bits */
  1253. "mov r4, #29\n\t"
  1254. "\n1:\n\t"
  1255. "movs r6, r6, lsl #1\n\t"
  1256. "adc r9, r9, r9\n\t"
  1257. "subs r8, r5, r9\n\t"
  1258. "sbc r8, r8, r8\n\t"
  1259. "add %[r], %[r], %[r]\n\t"
  1260. "sub %[r], %[r], r8\n\t"
  1261. "and r8, r8, r5\n\t"
  1262. "subs r9, r9, r8\n\t"
  1263. "subs r4, r4, #1\n\t"
  1264. "bpl 1b\n\t"
  1265. "add %[r], %[r], %[r]\n\t"
  1266. "add %[r], %[r], #1\n\t"
  1267. /* Handle difference has hi word > 0. */
  1268. "umull r4, r5, %[r], %[d]\n\t"
  1269. "subs r4, %[lo], r4\n\t"
  1270. "sbc r5, %[hi], r5\n\t"
  1271. "add %[r], %[r], r5\n\t"
  1272. "umull r4, r5, %[r], %[d]\n\t"
  1273. "subs r4, %[lo], r4\n\t"
  1274. "sbc r5, %[hi], r5\n\t"
  1275. "add %[r], %[r], r5\n\t"
  1276. /* Add 1 to result if bottom half of difference is >= d. */
  1277. "mul r4, %[r], %[d]\n\t"
  1278. "subs r4, %[lo], r4\n\t"
  1279. "subs r9, %[d], r4\n\t"
  1280. "sbc r8, r8, r8\n\t"
  1281. "sub %[r], %[r], r8\n\t"
  1282. "subs r9, r9, #1\n\t"
  1283. "sbc r8, r8, r8\n\t"
  1284. "sub %[r], %[r], r8\n\t"
  1285. : [r] "+r" (r), [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  1286. #if defined(WOLFSSL_SP_ARM_ARCH) && (WOLFSSL_SP_ARM_ARCH < 7)
  1287. : [t] "r" (debruijn32), [m] "m" (debruijn32_mul)
  1288. #else
  1289. :
  1290. #endif
  1291. : "r4", "r5", "r6", "r8", "r9", "cc"
  1292. );
  1293. return r;
  1294. }
  1295. #else
  1296. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  1297. *
  1298. * Using udiv instruction on arm32
  1299. * Constant time.
  1300. *
  1301. * @param [in] hi SP integer digit. High digit of the dividend.
  1302. * @param [in] lo SP integer digit. Lower digit of the dividend.
  1303. * @param [in] d SP integer digit. Number to divide by.
  1304. * @return The division result.
  1305. */
  1306. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  1307. sp_int_digit d)
  1308. {
  1309. __asm__ __volatile__ (
  1310. "lsrs r3, %[d], #24\n\t"
  1311. "it eq\n\t"
  1312. "moveq r3, #8\n\t"
  1313. "it ne\n\t"
  1314. "movne r3, #0\n\t"
  1315. "rsb r4, r3, #31\n\t"
  1316. "lsl %[d], %[d], r3\n\t"
  1317. "lsl %[hi], %[hi], r3\n\t"
  1318. "lsr r5, %[lo], r4\n\t"
  1319. "lsl %[lo], %[lo], r3\n\t"
  1320. "orr %[hi], %[hi], r5, lsr #1\n\t"
  1321. "lsr r5, %[d], 16\n\t"
  1322. "add r5, r5, 1\n\t"
  1323. "udiv r3, %[hi], r5\n\t"
  1324. "lsl r6, r3, 16\n\t"
  1325. "umull r4, r3, %[d], r6\n\t"
  1326. "subs %[lo], %[lo], r4\n\t"
  1327. "sbc %[hi], %[hi], r3\n\t"
  1328. "udiv r3, %[hi], r5\n\t"
  1329. "lsl r3, r3, 16\n\t"
  1330. "add r6, r6, r3\n\t"
  1331. "umull r4, r3, %[d], r3\n\t"
  1332. "subs %[lo], %[lo], r4\n\t"
  1333. "sbc %[hi], %[hi], r3\n\t"
  1334. "lsr r3, %[lo], 16\n\t"
  1335. "orr r3, r3, %[hi], lsl 16\n\t"
  1336. "udiv r3, r3, r5\n\t"
  1337. "add r6, r6, r3\n\t"
  1338. "umull r4, r3, %[d], r3\n\t"
  1339. "subs %[lo], %[lo], r4\n\t"
  1340. "sbc %[hi], %[hi], r3\n\t"
  1341. "lsr r3, %[lo], 16\n\t"
  1342. "orr r3, r3, %[hi], lsl 16\n\t"
  1343. "udiv r3, r3, r5\n\t"
  1344. "add r6, r6, r3\n\t"
  1345. "mul r4, %[d], r3\n\t"
  1346. "sub %[lo], %[lo], r4\n\t"
  1347. "udiv r3, %[lo], %[d]\n\t"
  1348. "add %[hi], r6, r3\n\t"
  1349. : [hi] "+r" (hi), [lo] "+r" (lo), [d] "+r" (d)
  1350. :
  1351. : "r3", "r4", "r5", "r6", "cc"
  1352. );
  1353. return hi;
  1354. }
  1355. #endif
  1356. #define SP_ASM_DIV_WORD
  1357. #endif
  1358. #define SP_INT_ASM_AVAILABLE
  1359. #endif /* (WOLFSSL_SP_ARM32 || ARM_CORTEX_M) && SP_WORD_SIZE == 32 */
  1360. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  1361. /*
  1362. * CPU: ARM Thumb (like Cortex-M0)
  1363. */
  1364. /* Compile with -fomit-frame-pointer, or similar, if compiler complains about
  1365. * usage of register 'r7'.
  1366. */
  1367. #if defined(__clang__)
  1368. /* Multiply va by vb and store double size result in: vh | vl */
  1369. #define SP_ASM_MUL(vl, vh, va, vb) \
  1370. __asm__ __volatile__ ( \
  1371. /* al * bl */ \
  1372. "uxth r6, %[a] \n\t" \
  1373. "uxth %[l], %[b] \n\t" \
  1374. "muls %[l], r6 \n\t" \
  1375. /* al * bh */ \
  1376. "lsrs r4, %[b], #16 \n\t" \
  1377. "muls r6, r4 \n\t" \
  1378. "lsrs %[h], r6, #16 \n\t" \
  1379. "lsls r6, r6, #16 \n\t" \
  1380. "adds %[l], %[l], r6 \n\t" \
  1381. "movs r5, #0 \n\t" \
  1382. "adcs %[h], r5 \n\t" \
  1383. /* ah * bh */ \
  1384. "lsrs r6, %[a], #16 \n\t" \
  1385. "muls r4, r6 \n\t" \
  1386. "adds %[h], %[h], r4 \n\t" \
  1387. /* ah * bl */ \
  1388. "uxth r4, %[b] \n\t" \
  1389. "muls r6, r4 \n\t" \
  1390. "lsrs r4, r6, #16 \n\t" \
  1391. "lsls r6, r6, #16 \n\t" \
  1392. "adds %[l], %[l], r6 \n\t" \
  1393. "adcs %[h], r4 \n\t" \
  1394. : [h] "+l" (vh), [l] "+l" (vl) \
  1395. : [a] "l" (va), [b] "l" (vb) \
  1396. : "r4", "r5", "r6", "cc" \
  1397. )
  1398. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1399. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1400. __asm__ __volatile__ ( \
  1401. /* al * bl */ \
  1402. "uxth r6, %[a] \n\t" \
  1403. "uxth %[l], %[b] \n\t" \
  1404. "muls %[l], r6 \n\t" \
  1405. /* al * bh */ \
  1406. "lsrs r5, %[b], #16 \n\t" \
  1407. "muls r6, r5 \n\t" \
  1408. "lsrs %[h], r6, #16 \n\t" \
  1409. "lsls r6, r6, #16 \n\t" \
  1410. "adds %[l], %[l], r6 \n\t" \
  1411. "movs %[o], #0 \n\t" \
  1412. "adcs %[h], %[o] \n\t" \
  1413. /* ah * bh */ \
  1414. "lsrs r6, %[a], #16 \n\t" \
  1415. "muls r5, r6 \n\t" \
  1416. "adds %[h], %[h], r5 \n\t" \
  1417. /* ah * bl */ \
  1418. "uxth r5, %[b] \n\t" \
  1419. "muls r6, r5 \n\t" \
  1420. "lsrs r5, r6, #16 \n\t" \
  1421. "lsls r6, r6, #16 \n\t" \
  1422. "adds %[l], %[l], r6 \n\t" \
  1423. "adcs %[h], r5 \n\t" \
  1424. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1425. : [a] "l" (va), [b] "l" (vb) \
  1426. : "r5", "r6", "cc" \
  1427. )
  1428. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  1429. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1430. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1431. __asm__ __volatile__ ( \
  1432. /* al * bl */ \
  1433. "uxth r6, %[a] \n\t" \
  1434. "uxth r7, %[b] \n\t" \
  1435. "muls r7, r6 \n\t" \
  1436. "adds %[l], %[l], r7 \n\t" \
  1437. "movs r5, #0 \n\t" \
  1438. "adcs %[h], r5 \n\t" \
  1439. "adcs %[o], r5 \n\t" \
  1440. /* al * bh */ \
  1441. "lsrs r7, %[b], #16 \n\t" \
  1442. "muls r6, r7 \n\t" \
  1443. "lsrs r7, r6, #16 \n\t" \
  1444. "lsls r6, r6, #16 \n\t" \
  1445. "adds %[l], %[l], r6 \n\t" \
  1446. "adcs %[h], r7 \n\t" \
  1447. "adcs %[o], r5 \n\t" \
  1448. /* ah * bh */ \
  1449. "lsrs r6, %[a], #16 \n\t" \
  1450. "lsrs r7, %[b], #16 \n\t" \
  1451. "muls r7, r6 \n\t" \
  1452. "adds %[h], %[h], r7 \n\t" \
  1453. "adcs %[o], r5 \n\t" \
  1454. /* ah * bl */ \
  1455. "uxth r7, %[b] \n\t" \
  1456. "muls r6, r7 \n\t" \
  1457. "lsrs r7, r6, #16 \n\t" \
  1458. "lsls r6, r6, #16 \n\t" \
  1459. "adds %[l], %[l], r6 \n\t" \
  1460. "adcs %[h], r7 \n\t" \
  1461. "adcs %[o], r5 \n\t" \
  1462. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1463. : [a] "l" (va), [b] "l" (vb) \
  1464. : "r5", "r6", "r7", "cc" \
  1465. )
  1466. #else
  1467. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1468. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1469. __asm__ __volatile__ ( \
  1470. /* al * bl */ \
  1471. "uxth r6, %[a] \n\t" \
  1472. "uxth r5, %[b] \n\t" \
  1473. "muls r5, r6 \n\t" \
  1474. "adds %[l], %[l], r5 \n\t" \
  1475. "movs r5, #0 \n\t" \
  1476. "adcs %[h], r5 \n\t" \
  1477. "adcs %[o], r5 \n\t" \
  1478. /* al * bh */ \
  1479. "lsrs r5, %[b], #16 \n\t" \
  1480. "muls r6, r5 \n\t" \
  1481. "lsrs r5, r6, #16 \n\t" \
  1482. "lsls r6, r6, #16 \n\t" \
  1483. "adds %[l], %[l], r6 \n\t" \
  1484. "adcs %[h], r5 \n\t" \
  1485. "movs r5, #0 \n\t" \
  1486. "adcs %[o], r5 \n\t" \
  1487. /* ah * bh */ \
  1488. "lsrs r6, %[a], #16 \n\t" \
  1489. "lsrs r5, %[b], #16 \n\t" \
  1490. "muls r5, r6 \n\t" \
  1491. "adds %[h], %[h], r5 \n\t" \
  1492. "movs r5, #0 \n\t" \
  1493. "adcs %[o], r5 \n\t" \
  1494. /* ah * bl */ \
  1495. "uxth r5, %[b] \n\t" \
  1496. "muls r6, r5 \n\t" \
  1497. "lsrs r5, r6, #16 \n\t" \
  1498. "lsls r6, r6, #16 \n\t" \
  1499. "adds %[l], %[l], r6 \n\t" \
  1500. "adcs %[h], r5 \n\t" \
  1501. "movs r5, #0 \n\t" \
  1502. "adcs %[o], r5 \n\t" \
  1503. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1504. : [a] "l" (va), [b] "l" (vb) \
  1505. : "r5", "r6", "cc" \
  1506. )
  1507. #endif
  1508. /* Multiply va by vb and add double size result into: vh | vl */
  1509. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1510. __asm__ __volatile__ ( \
  1511. /* al * bl */ \
  1512. "uxth r6, %[a] \n\t" \
  1513. "uxth r4, %[b] \n\t" \
  1514. "muls r4, r6 \n\t" \
  1515. "adds %[l], %[l], r4 \n\t" \
  1516. "movs r5, #0 \n\t" \
  1517. "adcs %[h], r5 \n\t" \
  1518. /* al * bh */ \
  1519. "lsrs r4, %[b], #16 \n\t" \
  1520. "muls r6, r4 \n\t" \
  1521. "lsrs r4, r6, #16 \n\t" \
  1522. "lsls r6, r6, #16 \n\t" \
  1523. "adds %[l], %[l], r6 \n\t" \
  1524. "adcs %[h], r4 \n\t" \
  1525. /* ah * bh */ \
  1526. "lsrs r6, %[a], #16 \n\t" \
  1527. "lsrs r4, %[b], #16 \n\t" \
  1528. "muls r4, r6 \n\t" \
  1529. "adds %[h], %[h], r4 \n\t" \
  1530. /* ah * bl */ \
  1531. "uxth r4, %[b] \n\t" \
  1532. "muls r6, r4 \n\t" \
  1533. "lsrs r4, r6, #16 \n\t" \
  1534. "lsls r6, r6, #16 \n\t" \
  1535. "adds %[l], %[l], r6 \n\t" \
  1536. "adcs %[h], r4 \n\t" \
  1537. : [l] "+l" (vl), [h] "+l" (vh) \
  1538. : [a] "l" (va), [b] "l" (vb) \
  1539. : "r4", "r5", "r6", "cc" \
  1540. )
  1541. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  1542. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1543. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1544. __asm__ __volatile__ ( \
  1545. /* al * bl */ \
  1546. "uxth r6, %[a] \n\t" \
  1547. "uxth r7, %[b] \n\t" \
  1548. "muls r7, r6 \n\t" \
  1549. "adds %[l], %[l], r7 \n\t" \
  1550. "movs r5, #0 \n\t" \
  1551. "adcs %[h], r5 \n\t" \
  1552. "adcs %[o], r5 \n\t" \
  1553. "adds %[l], %[l], r7 \n\t" \
  1554. "adcs %[h], r5 \n\t" \
  1555. "adcs %[o], r5 \n\t" \
  1556. /* al * bh */ \
  1557. "lsrs r7, %[b], #16 \n\t" \
  1558. "muls r6, r7 \n\t" \
  1559. "lsrs r7, r6, #16 \n\t" \
  1560. "lsls r6, r6, #16 \n\t" \
  1561. "adds %[l], %[l], r6 \n\t" \
  1562. "adcs %[h], r7 \n\t" \
  1563. "adcs %[o], r5 \n\t" \
  1564. "adds %[l], %[l], r6 \n\t" \
  1565. "adcs %[h], r7 \n\t" \
  1566. "adcs %[o], r5 \n\t" \
  1567. /* ah * bh */ \
  1568. "lsrs r6, %[a], #16 \n\t" \
  1569. "lsrs r7, %[b], #16 \n\t" \
  1570. "muls r7, r6 \n\t" \
  1571. "adds %[h], %[h], r7 \n\t" \
  1572. "adcs %[o], r5 \n\t" \
  1573. "adds %[h], %[h], r7 \n\t" \
  1574. "adcs %[o], r5 \n\t" \
  1575. /* ah * bl */ \
  1576. "uxth r7, %[b] \n\t" \
  1577. "muls r6, r7 \n\t" \
  1578. "lsrs r7, r6, #16 \n\t" \
  1579. "lsls r6, r6, #16 \n\t" \
  1580. "adds %[l], %[l], r6 \n\t" \
  1581. "adcs %[h], r7 \n\t" \
  1582. "adcs %[o], r5 \n\t" \
  1583. "adds %[l], %[l], r6 \n\t" \
  1584. "adcs %[h], r7 \n\t" \
  1585. "adcs %[o], r5 \n\t" \
  1586. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1587. : [a] "l" (va), [b] "l" (vb) \
  1588. : "r5", "r6", "r7", "cc" \
  1589. )
  1590. #else
  1591. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  1592. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  1593. __asm__ __volatile__ ( \
  1594. "movs r8, %[a] \n\t" \
  1595. /* al * bl */ \
  1596. "uxth r6, %[a] \n\t" \
  1597. "uxth r5, %[b] \n\t" \
  1598. "muls r5, r6 \n\t" \
  1599. "adds %[l], %[l], r5 \n\t" \
  1600. "movs %[a], #0 \n\t" \
  1601. "adcs %[h], %[a] \n\t" \
  1602. "adcs %[o], %[a] \n\t" \
  1603. "adds %[l], %[l], r5 \n\t" \
  1604. "adcs %[h], %[a] \n\t" \
  1605. "adcs %[o], %[a] \n\t" \
  1606. /* al * bh */ \
  1607. "lsrs r5, %[b], #16 \n\t" \
  1608. "muls r6, r5 \n\t" \
  1609. "lsrs r5, r6, #16 \n\t" \
  1610. "lsls r6, r6, #16 \n\t" \
  1611. "adds %[l], %[l], r6 \n\t" \
  1612. "adcs %[h], r5 \n\t" \
  1613. "adcs %[o], %[a] \n\t" \
  1614. "adds %[l], %[l], r6 \n\t" \
  1615. "adcs %[h], r5 \n\t" \
  1616. "adcs %[o], %[a] \n\t" \
  1617. /* ah * bh */ \
  1618. "movs %[a], r8 \n\t" \
  1619. "lsrs r6, %[a], #16 \n\t" \
  1620. "lsrs r5, %[b], #16 \n\t" \
  1621. "muls r5, r6 \n\t" \
  1622. "adds %[h], %[h], r5 \n\t" \
  1623. "movs %[a], #0 \n\t" \
  1624. "adcs %[o], %[a] \n\t" \
  1625. "adds %[h], %[h], r5 \n\t" \
  1626. "adcs %[o], %[a] \n\t" \
  1627. /* ah * bl */ \
  1628. "uxth r5, %[b] \n\t" \
  1629. "muls r6, r5 \n\t" \
  1630. "lsrs r5, r6, #16 \n\t" \
  1631. "lsls r6, r6, #16 \n\t" \
  1632. "adds %[l], %[l], r6 \n\t" \
  1633. "adcs %[h], r5 \n\t" \
  1634. "adcs %[o], %[a] \n\t" \
  1635. "adds %[l], %[l], r6 \n\t" \
  1636. "adcs %[h], r5 \n\t" \
  1637. "adcs %[o], %[a] \n\t" \
  1638. "movs %[a], r8 \n\t" \
  1639. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1640. : [a] "l" (va), [b] "l" (vb) \
  1641. : "r5", "r6", "r8", "cc" \
  1642. )
  1643. #endif
  1644. #ifndef DEBUG
  1645. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1646. * Assumes first add will not overflow vh | vl
  1647. */
  1648. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1649. __asm__ __volatile__ ( \
  1650. /* al * bl */ \
  1651. "uxth r6, %[a] \n\t" \
  1652. "uxth r7, %[b] \n\t" \
  1653. "muls r7, r6 \n\t" \
  1654. "adds %[l], %[l], r7 \n\t" \
  1655. "movs r5, #0 \n\t" \
  1656. "adcs %[h], r5 \n\t" \
  1657. "adds %[l], %[l], r7 \n\t" \
  1658. "adcs %[h], r5 \n\t" \
  1659. /* al * bh */ \
  1660. "lsrs r7, %[b], #16 \n\t" \
  1661. "muls r6, r7 \n\t" \
  1662. "lsrs r7, r6, #16 \n\t" \
  1663. "lsls r6, r6, #16 \n\t" \
  1664. "adds %[l], %[l], r6 \n\t" \
  1665. "adcs %[h], r7 \n\t" \
  1666. "adds %[l], %[l], r6 \n\t" \
  1667. "adcs %[h], r7 \n\t" \
  1668. "adcs %[o], r5 \n\t" \
  1669. /* ah * bh */ \
  1670. "lsrs r6, %[a], #16 \n\t" \
  1671. "lsrs r7, %[b], #16 \n\t" \
  1672. "muls r7, r6 \n\t" \
  1673. "adds %[h], %[h], r7 \n\t" \
  1674. "adcs %[o], r5 \n\t" \
  1675. "adds %[h], %[h], r7 \n\t" \
  1676. "adcs %[o], r5 \n\t" \
  1677. /* ah * bl */ \
  1678. "uxth r7, %[b] \n\t" \
  1679. "muls r6, r7 \n\t" \
  1680. "lsrs r7, r6, #16 \n\t" \
  1681. "lsls r6, r6, #16 \n\t" \
  1682. "adds %[l], %[l], r6 \n\t" \
  1683. "adcs %[h], r7 \n\t" \
  1684. "adcs %[o], r5 \n\t" \
  1685. "adds %[l], %[l], r6 \n\t" \
  1686. "adcs %[h], r7 \n\t" \
  1687. "adcs %[o], r5 \n\t" \
  1688. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1689. : [a] "l" (va), [b] "l" (vb) \
  1690. : "r5", "r6", "r7", "cc" \
  1691. )
  1692. #else
  1693. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  1694. * Assumes first add will not overflow vh | vl
  1695. */
  1696. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  1697. __asm__ __volatile__ ( \
  1698. "movs r8, %[a] \n\t" \
  1699. /* al * bl */ \
  1700. "uxth r5, %[a] \n\t" \
  1701. "uxth r6, %[b] \n\t" \
  1702. "muls r6, r5 \n\t" \
  1703. "adds %[l], %[l], r6 \n\t" \
  1704. "movs %[a], #0 \n\t" \
  1705. "adcs %[h], %[a] \n\t" \
  1706. "adds %[l], %[l], r6 \n\t" \
  1707. "adcs %[h], %[a] \n\t" \
  1708. /* al * bh */ \
  1709. "lsrs r6, %[b], #16 \n\t" \
  1710. "muls r5, r6 \n\t" \
  1711. "lsrs r6, r5, #16 \n\t" \
  1712. "lsls r5, r5, #16 \n\t" \
  1713. "adds %[l], %[l], r5 \n\t" \
  1714. "adcs %[h], r6 \n\t" \
  1715. "adds %[l], %[l], r5 \n\t" \
  1716. "adcs %[h], r6 \n\t" \
  1717. "adcs %[o], %[a] \n\t" \
  1718. /* ah * bh */ \
  1719. "movs %[a], r8 \n\t" \
  1720. "lsrs r5, %[a], #16 \n\t" \
  1721. "lsrs r6, %[b], #16 \n\t" \
  1722. "muls r6, r5 \n\t" \
  1723. "movs %[a], #0 \n\t" \
  1724. "adds %[h], %[h], r6 \n\t" \
  1725. "adcs %[o], %[a] \n\t" \
  1726. "adds %[h], %[h], r6 \n\t" \
  1727. "adcs %[o], %[a] \n\t" \
  1728. /* ah * bl */ \
  1729. "uxth r6, %[b] \n\t" \
  1730. "muls r5, r6 \n\t" \
  1731. "lsrs r6, r5, #16 \n\t" \
  1732. "lsls r5, r5, #16 \n\t" \
  1733. "adds %[l], %[l], r5 \n\t" \
  1734. "adcs %[h], r6 \n\t" \
  1735. "adcs %[o], %[a] \n\t" \
  1736. "adds %[l], %[l], r5 \n\t" \
  1737. "adcs %[h], r6 \n\t" \
  1738. "adcs %[o], %[a] \n\t" \
  1739. "movs %[a], r8 \n\t" \
  1740. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1741. : [a] "l" (va), [b] "l" (vb) \
  1742. : "r5", "r6", "r8", "cc" \
  1743. )
  1744. #endif
  1745. /* Square va and store double size result in: vh | vl */
  1746. #define SP_ASM_SQR(vl, vh, va) \
  1747. __asm__ __volatile__ ( \
  1748. "lsrs r5, %[a], #16 \n\t" \
  1749. "uxth r6, %[a] \n\t" \
  1750. "mov %[l], r6 \n\t" \
  1751. "mov %[h], r5 \n\t" \
  1752. /* al * al */ \
  1753. "muls %[l], %[l] \n\t" \
  1754. /* ah * ah */ \
  1755. "muls %[h], %[h] \n\t" \
  1756. /* 2 * al * ah */ \
  1757. "muls r6, r5 \n\t" \
  1758. "lsrs r5, r6, #15 \n\t" \
  1759. "lsls r6, r6, #17 \n\t" \
  1760. "adds %[l], %[l], r6 \n\t" \
  1761. "adcs %[h], r5 \n\t" \
  1762. : [h] "+l" (vh), [l] "+l" (vl) \
  1763. : [a] "l" (va) \
  1764. : "r5", "r6", "cc" \
  1765. )
  1766. /* Square va and add double size result into: vo | vh | vl */
  1767. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  1768. __asm__ __volatile__ ( \
  1769. "lsrs r4, %[a], #16 \n\t" \
  1770. "uxth r6, %[a] \n\t" \
  1771. /* al * al */ \
  1772. "muls r6, r6 \n\t" \
  1773. /* ah * ah */ \
  1774. "muls r4, r4 \n\t" \
  1775. "adds %[l], %[l], r6 \n\t" \
  1776. "adcs %[h], r4 \n\t" \
  1777. "movs r5, #0 \n\t" \
  1778. "adcs %[o], r5 \n\t" \
  1779. "lsrs r4, %[a], #16 \n\t" \
  1780. "uxth r6, %[a] \n\t" \
  1781. /* 2 * al * ah */ \
  1782. "muls r6, r4 \n\t" \
  1783. "lsrs r4, r6, #15 \n\t" \
  1784. "lsls r6, r6, #17 \n\t" \
  1785. "adds %[l], %[l], r6 \n\t" \
  1786. "adcs %[h], r4 \n\t" \
  1787. "adcs %[o], r5 \n\t" \
  1788. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1789. : [a] "l" (va) \
  1790. : "r4", "r5", "r6", "cc" \
  1791. )
  1792. /* Square va and add double size result into: vh | vl */
  1793. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  1794. __asm__ __volatile__ ( \
  1795. "lsrs r6, %[a], #16 \n\t" \
  1796. "uxth r6, %[a] \n\t" \
  1797. /* al * al */ \
  1798. "muls r6, r6 \n\t" \
  1799. /* ah * ah */ \
  1800. "muls r6, r6 \n\t" \
  1801. "adds %[l], %[l], r6 \n\t" \
  1802. "adcs %[h], r6 \n\t" \
  1803. "lsrs r6, %[a], #16 \n\t" \
  1804. "uxth r6, %[a] \n\t" \
  1805. /* 2 * al * ah */ \
  1806. "muls r6, r6 \n\t" \
  1807. "lsrs r6, r6, #15 \n\t" \
  1808. "lsls r6, r6, #17 \n\t" \
  1809. "adds %[l], %[l], r6 \n\t" \
  1810. "adcs %[h], r6 \n\t" \
  1811. : [l] "+l" (vl), [h] "+l" (vh) \
  1812. : [a] "l" (va) \
  1813. : "r5", "r6", "cc" \
  1814. )
  1815. /* Add va into: vh | vl */
  1816. #define SP_ASM_ADDC(vl, vh, va) \
  1817. __asm__ __volatile__ ( \
  1818. "adds %[l], %[l], %[a] \n\t" \
  1819. "movs r5, #0 \n\t" \
  1820. "adcs %[h], r5 \n\t" \
  1821. : [l] "+l" (vl), [h] "+l" (vh) \
  1822. : [a] "l" (va) \
  1823. : "r5", "cc" \
  1824. )
  1825. /* Sub va from: vh | vl */
  1826. #define SP_ASM_SUBB(vl, vh, va) \
  1827. __asm__ __volatile__ ( \
  1828. "subs %[l], %[l], %[a] \n\t" \
  1829. "movs r5, #0 \n\t" \
  1830. "sbcs %[h], r5 \n\t" \
  1831. : [l] "+l" (vl), [h] "+l" (vh) \
  1832. : [a] "l" (va) \
  1833. : "r5", "cc" \
  1834. )
  1835. /* Add two times vc | vb | va into vo | vh | vl */
  1836. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  1837. __asm__ __volatile__ ( \
  1838. "adds %[l], %[l], %[a] \n\t" \
  1839. "adcs %[h], %[b] \n\t" \
  1840. "adcs %[o], %[c] \n\t" \
  1841. "adds %[l], %[l], %[a] \n\t" \
  1842. "adcs %[h], %[b] \n\t" \
  1843. "adcs %[o], %[c] \n\t" \
  1844. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1845. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  1846. : "cc" \
  1847. )
  1848. #elif defined(WOLFSSL_KEIL)
  1849. /* Multiply va by vb and store double size result in: vh | vl */
  1850. #define SP_ASM_MUL(vl, vh, va, vb) \
  1851. __asm__ __volatile__ ( \
  1852. /* al * bl */ \
  1853. "uxth r6, %[a] \n\t" \
  1854. "uxth %[l], %[b] \n\t" \
  1855. "muls %[l], r6, %[l] \n\t" \
  1856. /* al * bh */ \
  1857. "lsrs r4, %[b], #16 \n\t" \
  1858. "muls r6, r4, r6 \n\t" \
  1859. "lsrs %[h], r6, #16 \n\t" \
  1860. "lsls r6, r6, #16 \n\t" \
  1861. "adds %[l], %[l], r6 \n\t" \
  1862. "movs r5, #0 \n\t" \
  1863. "adcs %[h], %[h], r5 \n\t" \
  1864. /* ah * bh */ \
  1865. "lsrs r6, %[a], #16 \n\t" \
  1866. "muls r4, r6, r4 \n\t" \
  1867. "adds %[h], %[h], r4 \n\t" \
  1868. /* ah * bl */ \
  1869. "uxth r4, %[b] \n\t" \
  1870. "muls r6, r4, r6 \n\t" \
  1871. "lsrs r4, r6, #16 \n\t" \
  1872. "lsls r6, r6, #16 \n\t" \
  1873. "adds %[l], %[l], r6 \n\t" \
  1874. "adcs %[h], %[h], r4 \n\t" \
  1875. : [h] "+l" (vh), [l] "+l" (vl) \
  1876. : [a] "l" (va), [b] "l" (vb) \
  1877. : "r4", "r5", "r6", "cc" \
  1878. )
  1879. /* Multiply va by vb and store double size result in: vo | vh | vl */
  1880. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  1881. __asm__ __volatile__ ( \
  1882. /* al * bl */ \
  1883. "uxth r6, %[a] \n\t" \
  1884. "uxth %[l], %[b] \n\t" \
  1885. "muls %[l], r6, %[l] \n\t" \
  1886. /* al * bh */ \
  1887. "lsrs r5, %[b], #16 \n\t" \
  1888. "muls r6, r5, r6 \n\t" \
  1889. "lsrs %[h], r6, #16 \n\t" \
  1890. "lsls r6, r6, #16 \n\t" \
  1891. "adds %[l], %[l], r6 \n\t" \
  1892. "movs %[o], #0 \n\t" \
  1893. "adcs %[h], %[h], %[o] \n\t" \
  1894. /* ah * bh */ \
  1895. "lsrs r6, %[a], #16 \n\t" \
  1896. "muls r5, r6, r5 \n\t" \
  1897. "adds %[h], %[h], r5 \n\t" \
  1898. /* ah * bl */ \
  1899. "uxth r5, %[b] \n\t" \
  1900. "muls r6, r5, r6 \n\t" \
  1901. "lsrs r5, r6, #16 \n\t" \
  1902. "lsls r6, r6, #16 \n\t" \
  1903. "adds %[l], %[l], r6 \n\t" \
  1904. "adcs %[h], %[h], r5 \n\t" \
  1905. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1906. : [a] "l" (va), [b] "l" (vb) \
  1907. : "r5", "r6", "cc" \
  1908. )
  1909. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  1910. /* Multiply va by vb and add double size result into: vo | vh | vl */
  1911. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1912. __asm__ __volatile__ ( \
  1913. /* al * bl */ \
  1914. "uxth r6, %[a] \n\t" \
  1915. "uxth r7, %[b] \n\t" \
  1916. "muls r7, r6, r7 \n\t" \
  1917. "adds %[l], %[l], r7 \n\t" \
  1918. "movs r5, #0 \n\t" \
  1919. "adcs %[h], %[h], r5 \n\t" \
  1920. "adcs %[o], %[o], r5 \n\t" \
  1921. /* al * bh */ \
  1922. "lsrs r7, %[b], #16 \n\t" \
  1923. "muls r6, r7, r6 \n\t" \
  1924. "lsrs r7, r6, #16 \n\t" \
  1925. "lsls r6, r6, #16 \n\t" \
  1926. "adds %[l], %[l], r6 \n\t" \
  1927. "adcs %[h], %[h], r7 \n\t" \
  1928. "adcs %[o], %[o], r5 \n\t" \
  1929. /* ah * bh */ \
  1930. "lsrs r6, %[a], #16 \n\t" \
  1931. "lsrs r7, %[b], #16 \n\t" \
  1932. "muls r7, r6, r7 \n\t" \
  1933. "adds %[h], %[h], r7 \n\t" \
  1934. "adcs %[o], %[o], r5 \n\t" \
  1935. /* ah * bl */ \
  1936. "uxth r7, %[b] \n\t" \
  1937. "muls r6, r7, r6 \n\t" \
  1938. "lsrs r7, r6, #16 \n\t" \
  1939. "lsls r6, r6, #16 \n\t" \
  1940. "adds %[l], %[l], r6 \n\t" \
  1941. "adcs %[h], %[h], r7 \n\t" \
  1942. "adcs %[o], %[o], r5 \n\t" \
  1943. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1944. : [a] "l" (va), [b] "l" (vb) \
  1945. : "r5", "r6", "r7", "cc" \
  1946. )
  1947. #else
  1948. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  1949. __asm__ __volatile__ ( \
  1950. /* al * bl */ \
  1951. "uxth r6, %[a] \n\t" \
  1952. "uxth r5, %[b] \n\t" \
  1953. "muls r5, r6, r5 \n\t" \
  1954. "adds %[l], %[l], r5 \n\t" \
  1955. "movs r5, #0 \n\t" \
  1956. "adcs %[h], %[h], r5 \n\t" \
  1957. "adcs %[o], %[o], r5 \n\t" \
  1958. /* al * bh */ \
  1959. "lsrs r5, %[b], #16 \n\t" \
  1960. "muls r6, r5, r6 \n\t" \
  1961. "lsrs r5, r6, #16 \n\t" \
  1962. "lsls r6, r6, #16 \n\t" \
  1963. "adds %[l], %[l], r6 \n\t" \
  1964. "adcs %[h], %[h], r5 \n\t" \
  1965. "movs r5, #0 \n\t" \
  1966. "adcs %[o], %[o], r5 \n\t" \
  1967. /* ah * bh */ \
  1968. "lsrs r6, %[a], #16 \n\t" \
  1969. "lsrs r5, %[b], #16 \n\t" \
  1970. "muls r5, r6, r5 \n\t" \
  1971. "adds %[h], %[h], r5 \n\t" \
  1972. "movs r5, #0 \n\t" \
  1973. "adcs %[o], %[o], r5 \n\t" \
  1974. /* ah * bl */ \
  1975. "uxth r5, %[b] \n\t" \
  1976. "muls r6, r5, r6 \n\t" \
  1977. "lsrs r5, r6, #16 \n\t" \
  1978. "lsls r6, r6, #16 \n\t" \
  1979. "adds %[l], %[l], r6 \n\t" \
  1980. "adcs %[h], %[h], r5 \n\t" \
  1981. "movs r5, #0 \n\t" \
  1982. "adcs %[o], %[o], r5 \n\t" \
  1983. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  1984. : [a] "l" (va), [b] "l" (vb) \
  1985. : "r5", "r6", "cc" \
  1986. )
  1987. #endif
  1988. /* Multiply va by vb and add double size result into: vh | vl */
  1989. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  1990. __asm__ __volatile__ ( \
  1991. /* al * bl */ \
  1992. "uxth r6, %[a] \n\t" \
  1993. "uxth r4, %[b] \n\t" \
  1994. "muls r4, r6, r4 \n\t" \
  1995. "adds %[l], %[l], r4 \n\t" \
  1996. "movs r5, #0 \n\t" \
  1997. "adcs %[h], %[h], r5 \n\t" \
  1998. /* al * bh */ \
  1999. "lsrs r4, %[b], #16 \n\t" \
  2000. "muls r6, r4, r6 \n\t" \
  2001. "lsrs r4, r6, #16 \n\t" \
  2002. "lsls r6, r6, #16 \n\t" \
  2003. "adds %[l], %[l], r6 \n\t" \
  2004. "adcs %[h], %[h], r4 \n\t" \
  2005. /* ah * bh */ \
  2006. "lsrs r6, %[a], #16 \n\t" \
  2007. "lsrs r4, %[b], #16 \n\t" \
  2008. "muls r4, r6, r4 \n\t" \
  2009. "adds %[h], %[h], r4 \n\t" \
  2010. /* ah * bl */ \
  2011. "uxth r4, %[b] \n\t" \
  2012. "muls r6, r4, r6 \n\t" \
  2013. "lsrs r4, r6, #16 \n\t" \
  2014. "lsls r6, r6, #16 \n\t" \
  2015. "adds %[l], %[l], r6 \n\t" \
  2016. "adcs %[h], %[h], r4 \n\t" \
  2017. : [l] "+l" (vl), [h] "+l" (vh) \
  2018. : [a] "l" (va), [b] "l" (vb) \
  2019. : "r4", "r5", "r6", "cc" \
  2020. )
  2021. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  2022. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2023. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2024. __asm__ __volatile__ ( \
  2025. /* al * bl */ \
  2026. "uxth r6, %[a] \n\t" \
  2027. "uxth r7, %[b] \n\t" \
  2028. "muls r7, r6, r7 \n\t" \
  2029. "adds %[l], %[l], r7 \n\t" \
  2030. "movs r5, #0 \n\t" \
  2031. "adcs %[h], %[h], r5 \n\t" \
  2032. "adcs %[o], %[o], r5 \n\t" \
  2033. "adds %[l], %[l], r7 \n\t" \
  2034. "adcs %[h], %[h], r5 \n\t" \
  2035. "adcs %[o], %[o], r5 \n\t" \
  2036. /* al * bh */ \
  2037. "lsrs r7, %[b], #16 \n\t" \
  2038. "muls r6, r7, r6 \n\t" \
  2039. "lsrs r7, r6, #16 \n\t" \
  2040. "lsls r6, r6, #16 \n\t" \
  2041. "adds %[l], %[l], r6 \n\t" \
  2042. "adcs %[h], %[h], r7 \n\t" \
  2043. "adcs %[o], %[o], r5 \n\t" \
  2044. "adds %[l], %[l], r6 \n\t" \
  2045. "adcs %[h], %[h], r7 \n\t" \
  2046. "adcs %[o], %[o], r5 \n\t" \
  2047. /* ah * bh */ \
  2048. "lsrs r6, %[a], #16 \n\t" \
  2049. "lsrs r7, %[b], #16 \n\t" \
  2050. "muls r7, r6, r7 \n\t" \
  2051. "adds %[h], %[h], r7 \n\t" \
  2052. "adcs %[o], %[o], r5 \n\t" \
  2053. "adds %[h], %[h], r7 \n\t" \
  2054. "adcs %[o], %[o], r5 \n\t" \
  2055. /* ah * bl */ \
  2056. "uxth r7, %[b] \n\t" \
  2057. "muls r6, r7, r6 \n\t" \
  2058. "lsrs r7, r6, #16 \n\t" \
  2059. "lsls r6, r6, #16 \n\t" \
  2060. "adds %[l], %[l], r6 \n\t" \
  2061. "adcs %[h], %[h], r7 \n\t" \
  2062. "adcs %[o], %[o], r5 \n\t" \
  2063. "adds %[l], %[l], r6 \n\t" \
  2064. "adcs %[h], %[h], r7 \n\t" \
  2065. "adcs %[o], %[o], r5 \n\t" \
  2066. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2067. : [a] "l" (va), [b] "l" (vb) \
  2068. : "r5", "r6", "r7", "cc" \
  2069. )
  2070. #else
  2071. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2072. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2073. __asm__ __volatile__ ( \
  2074. "movs r8, %[a] \n\t" \
  2075. /* al * bl */ \
  2076. "uxth r6, %[a] \n\t" \
  2077. "uxth r5, %[b] \n\t" \
  2078. "muls r5, r6, r5 \n\t" \
  2079. "adds %[l], %[l], r5 \n\t" \
  2080. "movs %[a], #0 \n\t" \
  2081. "adcs %[h], %[h], %[a] \n\t" \
  2082. "adcs %[o], %[o], %[a] \n\t" \
  2083. "adds %[l], %[l], r5 \n\t" \
  2084. "adcs %[h], %[h], %[a] \n\t" \
  2085. "adcs %[o], %[o], %[a] \n\t" \
  2086. /* al * bh */ \
  2087. "lsrs r5, %[b], #16 \n\t" \
  2088. "muls r6, r5, r6 \n\t" \
  2089. "lsrs r5, r6, #16 \n\t" \
  2090. "lsls r6, r6, #16 \n\t" \
  2091. "adds %[l], %[l], r6 \n\t" \
  2092. "adcs %[h], %[h], r5 \n\t" \
  2093. "adcs %[o], %[o], %[a] \n\t" \
  2094. "adds %[l], %[l], r6 \n\t" \
  2095. "adcs %[h], %[h], r5 \n\t" \
  2096. "adcs %[o], %[o], %[a] \n\t" \
  2097. /* ah * bh */ \
  2098. "movs %[a], r8 \n\t" \
  2099. "lsrs r6, %[a], #16 \n\t" \
  2100. "lsrs r5, %[b], #16 \n\t" \
  2101. "muls r5, r6, r5 \n\t" \
  2102. "adds %[h], %[h], r5 \n\t" \
  2103. "movs %[a], #0 \n\t" \
  2104. "adcs %[o], %[o], %[a] \n\t" \
  2105. "adds %[h], %[h], r5 \n\t" \
  2106. "adcs %[o], %[o], %[a] \n\t" \
  2107. /* ah * bl */ \
  2108. "uxth r5, %[b] \n\t" \
  2109. "muls r6, r5, r6 \n\t" \
  2110. "lsrs r5, r6, #16 \n\t" \
  2111. "lsls r6, r6, #16 \n\t" \
  2112. "adds %[l], %[l], r6 \n\t" \
  2113. "adcs %[h], %[h], r5 \n\t" \
  2114. "adcs %[o], %[o], %[a] \n\t" \
  2115. "adds %[l], %[l], r6 \n\t" \
  2116. "adcs %[h], %[h], r5 \n\t" \
  2117. "adcs %[o], %[o], %[a] \n\t" \
  2118. "movs %[a], r8 \n\t" \
  2119. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2120. : [a] "l" (va), [b] "l" (vb) \
  2121. : "r5", "r6", "r8", "cc" \
  2122. )
  2123. #endif
  2124. #ifndef DEBUG
  2125. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2126. * Assumes first add will not overflow vh | vl
  2127. */
  2128. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2129. __asm__ __volatile__ ( \
  2130. /* al * bl */ \
  2131. "uxth r6, %[a] \n\t" \
  2132. "uxth r7, %[b] \n\t" \
  2133. "muls r7, r6, r7 \n\t" \
  2134. "adds %[l], %[l], r7 \n\t" \
  2135. "movs r5, #0 \n\t" \
  2136. "adcs %[h], %[h], r5 \n\t" \
  2137. "adds %[l], %[l], r7 \n\t" \
  2138. "adcs %[h], %[h], r5 \n\t" \
  2139. /* al * bh */ \
  2140. "lsrs r7, %[b], #16 \n\t" \
  2141. "muls r6, r7, r6 \n\t" \
  2142. "lsrs r7, r6, #16 \n\t" \
  2143. "lsls r6, r6, #16 \n\t" \
  2144. "adds %[l], %[l], r6 \n\t" \
  2145. "adcs %[h], %[h], r7 \n\t" \
  2146. "adds %[l], %[l], r6 \n\t" \
  2147. "adcs %[h], %[h], r7 \n\t" \
  2148. "adcs %[o], %[o], r5 \n\t" \
  2149. /* ah * bh */ \
  2150. "lsrs r6, %[a], #16 \n\t" \
  2151. "lsrs r7, %[b], #16 \n\t" \
  2152. "muls r7, r6, r7 \n\t" \
  2153. "adds %[h], %[h], r7 \n\t" \
  2154. "adcs %[o], %[o], r5 \n\t" \
  2155. "adds %[h], %[h], r7 \n\t" \
  2156. "adcs %[o], %[o], r5 \n\t" \
  2157. /* ah * bl */ \
  2158. "uxth r7, %[b] \n\t" \
  2159. "muls r6, r7, r6 \n\t" \
  2160. "lsrs r7, r6, #16 \n\t" \
  2161. "lsls r6, r6, #16 \n\t" \
  2162. "adds %[l], %[l], r6 \n\t" \
  2163. "adcs %[h], %[h], r7 \n\t" \
  2164. "adcs %[o], %[o], r5 \n\t" \
  2165. "adds %[l], %[l], r6 \n\t" \
  2166. "adcs %[h], %[h], r7 \n\t" \
  2167. "adcs %[o], %[o], r5 \n\t" \
  2168. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2169. : [a] "l" (va), [b] "l" (vb) \
  2170. : "r5", "r6", "r7", "cc" \
  2171. )
  2172. #else
  2173. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2174. * Assumes first add will not overflow vh | vl
  2175. */
  2176. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2177. __asm__ __volatile__ ( \
  2178. "movs r8, %[a] \n\t" \
  2179. /* al * bl */ \
  2180. "uxth r5, %[a] \n\t" \
  2181. "uxth r6, %[b] \n\t" \
  2182. "muls r6, r5, r6 \n\t" \
  2183. "adds %[l], %[l], r6 \n\t" \
  2184. "movs %[a], #0 \n\t" \
  2185. "adcs %[h], %[h], %[a] \n\t" \
  2186. "adds %[l], %[l], r6 \n\t" \
  2187. "adcs %[h], %[h], %[a] \n\t" \
  2188. /* al * bh */ \
  2189. "lsrs r6, %[b], #16 \n\t" \
  2190. "muls r5, r6, r5 \n\t" \
  2191. "lsrs r6, r5, #16 \n\t" \
  2192. "lsls r5, r5, #16 \n\t" \
  2193. "adds %[l], %[l], r5 \n\t" \
  2194. "adcs %[h], %[h], r6 \n\t" \
  2195. "adds %[l], %[l], r5 \n\t" \
  2196. "adcs %[h], %[h], r6 \n\t" \
  2197. "adcs %[o], %[o], %[a] \n\t" \
  2198. /* ah * bh */ \
  2199. "movs %[a], r8 \n\t" \
  2200. "lsrs r5, %[a], #16 \n\t" \
  2201. "lsrs r6, %[b], #16 \n\t" \
  2202. "muls r6, r5, r6 \n\t" \
  2203. "movs %[a], #0 \n\t" \
  2204. "adds %[h], %[h], r6 \n\t" \
  2205. "adcs %[o], %[o], %[a] \n\t" \
  2206. "adds %[h], %[h], r6 \n\t" \
  2207. "adcs %[o], %[o], %[a] \n\t" \
  2208. /* ah * bl */ \
  2209. "uxth r6, %[b] \n\t" \
  2210. "muls r5, r6, r5 \n\t" \
  2211. "lsrs r6, r5, #16 \n\t" \
  2212. "lsls r5, r5, #16 \n\t" \
  2213. "adds %[l], %[l], r5 \n\t" \
  2214. "adcs %[h], %[h], r6 \n\t" \
  2215. "adcs %[o], %[o], %[a] \n\t" \
  2216. "adds %[l], %[l], r5 \n\t" \
  2217. "adcs %[h], %[h], r6 \n\t" \
  2218. "adcs %[o], %[o], %[a] \n\t" \
  2219. "movs %[a], r8 \n\t" \
  2220. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2221. : [a] "l" (va), [b] "l" (vb) \
  2222. : "r5", "r6", "r8", "cc" \
  2223. )
  2224. #endif
  2225. /* Square va and store double size result in: vh | vl */
  2226. #define SP_ASM_SQR(vl, vh, va) \
  2227. __asm__ __volatile__ ( \
  2228. "lsrs r5, %[a], #16 \n\t" \
  2229. "uxth r6, %[a] \n\t" \
  2230. "mov %[l], r6 \n\t" \
  2231. "mov %[h], r5 \n\t" \
  2232. /* al * al */ \
  2233. "muls %[l], %[l], %[l] \n\t" \
  2234. /* ah * ah */ \
  2235. "muls %[h], %[h], %[h] \n\t" \
  2236. /* 2 * al * ah */ \
  2237. "muls r6, r5, r6 \n\t" \
  2238. "lsrs r5, r6, #15 \n\t" \
  2239. "lsls r6, r6, #17 \n\t" \
  2240. "adds %[l], %[l], r6 \n\t" \
  2241. "adcs %[h], %[h], r5 \n\t" \
  2242. : [h] "+l" (vh), [l] "+l" (vl) \
  2243. : [a] "l" (va) \
  2244. : "r5", "r6", "cc" \
  2245. )
  2246. /* Square va and add double size result into: vo | vh | vl */
  2247. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  2248. __asm__ __volatile__ ( \
  2249. "lsrs r4, %[a], #16 \n\t" \
  2250. "uxth r6, %[a] \n\t" \
  2251. /* al * al */ \
  2252. "muls r6, r6, r6 \n\t" \
  2253. /* ah * ah */ \
  2254. "muls r4, r4, r4 \n\t" \
  2255. "adds %[l], %[l], r6 \n\t" \
  2256. "adcs %[h], %[h], r4 \n\t" \
  2257. "movs r5, #0 \n\t" \
  2258. "adcs %[o], %[o], r5 \n\t" \
  2259. "lsrs r4, %[a], #16 \n\t" \
  2260. "uxth r6, %[a] \n\t" \
  2261. /* 2 * al * ah */ \
  2262. "muls r6, r4, r6 \n\t" \
  2263. "lsrs r4, r6, #15 \n\t" \
  2264. "lsls r6, r6, #17 \n\t" \
  2265. "adds %[l], %[l], r6 \n\t" \
  2266. "adcs %[h], %[h], r4 \n\t" \
  2267. "adcs %[o], %[o], r5 \n\t" \
  2268. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2269. : [a] "l" (va) \
  2270. : "r4", "r5", "r6", "cc" \
  2271. )
  2272. /* Square va and add double size result into: vh | vl */
  2273. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  2274. __asm__ __volatile__ ( \
  2275. "lsrs r5, %[a], #16 \n\t" \
  2276. "uxth r6, %[a] \n\t" \
  2277. /* al * al */ \
  2278. "muls r6, r6, r6 \n\t" \
  2279. /* ah * ah */ \
  2280. "muls r5, r5, r5 \n\t" \
  2281. "adds %[l], %[l], r6 \n\t" \
  2282. "adcs %[h], %[h], r5 \n\t" \
  2283. "lsrs r5, %[a], #16 \n\t" \
  2284. "uxth r6, %[a] \n\t" \
  2285. /* 2 * al * ah */ \
  2286. "muls r6, r5, r6 \n\t" \
  2287. "lsrs r5, r6, #15 \n\t" \
  2288. "lsls r6, r6, #17 \n\t" \
  2289. "adds %[l], %[l], r6 \n\t" \
  2290. "adcs %[h], %[h], r5 \n\t" \
  2291. : [l] "+l" (vl), [h] "+l" (vh) \
  2292. : [a] "l" (va) \
  2293. : "r5", "r6", "cc" \
  2294. )
  2295. /* Add va into: vh | vl */
  2296. #define SP_ASM_ADDC(vl, vh, va) \
  2297. __asm__ __volatile__ ( \
  2298. "adds %[l], %[l], %[a] \n\t" \
  2299. "movs r5, #0 \n\t" \
  2300. "adcs %[h], %[h], r5 \n\t" \
  2301. : [l] "+l" (vl), [h] "+l" (vh) \
  2302. : [a] "l" (va) \
  2303. : "r5", "cc" \
  2304. )
  2305. /* Sub va from: vh | vl */
  2306. #define SP_ASM_SUBB(vl, vh, va) \
  2307. __asm__ __volatile__ ( \
  2308. "subs %[l], %[l], %[a] \n\t" \
  2309. "movs r5, #0 \n\t" \
  2310. "sbcs %[h], %[h], r5 \n\t" \
  2311. : [l] "+l" (vl), [h] "+l" (vh) \
  2312. : [a] "l" (va) \
  2313. : "r5", "cc" \
  2314. )
  2315. /* Add two times vc | vb | va into vo | vh | vl */
  2316. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  2317. __asm__ __volatile__ ( \
  2318. "adds %[l], %[l], %[a] \n\t" \
  2319. "adcs %[h], %[h], %[b] \n\t" \
  2320. "adcs %[o], %[o], %[c] \n\t" \
  2321. "adds %[l], %[l], %[a] \n\t" \
  2322. "adcs %[h], %[h], %[b] \n\t" \
  2323. "adcs %[o], %[o], %[c] \n\t" \
  2324. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2325. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  2326. : "cc" \
  2327. )
  2328. #elif defined(__GNUC__)
  2329. /* Multiply va by vb and store double size result in: vh | vl */
  2330. #define SP_ASM_MUL(vl, vh, va, vb) \
  2331. __asm__ __volatile__ ( \
  2332. /* al * bl */ \
  2333. "uxth r6, %[a] \n\t" \
  2334. "uxth %[l], %[b] \n\t" \
  2335. "mul %[l], r6 \n\t" \
  2336. /* al * bh */ \
  2337. "lsr r4, %[b], #16 \n\t" \
  2338. "mul r6, r4 \n\t" \
  2339. "lsr %[h], r6, #16 \n\t" \
  2340. "lsl r6, r6, #16 \n\t" \
  2341. "add %[l], %[l], r6 \n\t" \
  2342. "mov r5, #0 \n\t" \
  2343. "adc %[h], r5 \n\t" \
  2344. /* ah * bh */ \
  2345. "lsr r6, %[a], #16 \n\t" \
  2346. "mul r4, r6 \n\t" \
  2347. "add %[h], %[h], r4 \n\t" \
  2348. /* ah * bl */ \
  2349. "uxth r4, %[b] \n\t" \
  2350. "mul r6, r4 \n\t" \
  2351. "lsr r4, r6, #16 \n\t" \
  2352. "lsl r6, r6, #16 \n\t" \
  2353. "add %[l], %[l], r6 \n\t" \
  2354. "adc %[h], r4 \n\t" \
  2355. : [h] "+l" (vh), [l] "+l" (vl) \
  2356. : [a] "l" (va), [b] "l" (vb) \
  2357. : "r4", "r5", "r6", "cc" \
  2358. )
  2359. /* Multiply va by vb and store double size result in: vo | vh | vl */
  2360. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  2361. __asm__ __volatile__ ( \
  2362. /* al * bl */ \
  2363. "uxth r6, %[a] \n\t" \
  2364. "uxth %[l], %[b] \n\t" \
  2365. "mul %[l], r6 \n\t" \
  2366. /* al * bh */ \
  2367. "lsr r5, %[b], #16 \n\t" \
  2368. "mul r6, r5 \n\t" \
  2369. "lsr %[h], r6, #16 \n\t" \
  2370. "lsl r6, r6, #16 \n\t" \
  2371. "add %[l], %[l], r6 \n\t" \
  2372. "mov %[o], #0 \n\t" \
  2373. "adc %[h], %[o] \n\t" \
  2374. /* ah * bh */ \
  2375. "lsr r6, %[a], #16 \n\t" \
  2376. "mul r5, r6 \n\t" \
  2377. "add %[h], %[h], r5 \n\t" \
  2378. /* ah * bl */ \
  2379. "uxth r5, %[b] \n\t" \
  2380. "mul r6, r5 \n\t" \
  2381. "lsr r5, r6, #16 \n\t" \
  2382. "lsl r6, r6, #16 \n\t" \
  2383. "add %[l], %[l], r6 \n\t" \
  2384. "adc %[h], r5 \n\t" \
  2385. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2386. : [a] "l" (va), [b] "l" (vb) \
  2387. : "r5", "r6", "cc" \
  2388. )
  2389. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  2390. /* Multiply va by vb and add double size result into: vo | vh | vl */
  2391. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  2392. __asm__ __volatile__ ( \
  2393. /* al * bl */ \
  2394. "uxth r6, %[a] \n\t" \
  2395. "uxth r7, %[b] \n\t" \
  2396. "mul r7, r6 \n\t" \
  2397. "add %[l], %[l], r7 \n\t" \
  2398. "mov r5, #0 \n\t" \
  2399. "adc %[h], r5 \n\t" \
  2400. "adc %[o], r5 \n\t" \
  2401. /* al * bh */ \
  2402. "lsr r7, %[b], #16 \n\t" \
  2403. "mul r6, r7 \n\t" \
  2404. "lsr r7, r6, #16 \n\t" \
  2405. "lsl r6, r6, #16 \n\t" \
  2406. "add %[l], %[l], r6 \n\t" \
  2407. "adc %[h], r7 \n\t" \
  2408. "adc %[o], r5 \n\t" \
  2409. /* ah * bh */ \
  2410. "lsr r6, %[a], #16 \n\t" \
  2411. "lsr r7, %[b], #16 \n\t" \
  2412. "mul r7, r6 \n\t" \
  2413. "add %[h], %[h], r7 \n\t" \
  2414. "adc %[o], r5 \n\t" \
  2415. /* ah * bl */ \
  2416. "uxth r7, %[b] \n\t" \
  2417. "mul r6, r7 \n\t" \
  2418. "lsr r7, r6, #16 \n\t" \
  2419. "lsl r6, r6, #16 \n\t" \
  2420. "add %[l], %[l], r6 \n\t" \
  2421. "adc %[h], r7 \n\t" \
  2422. "adc %[o], r5 \n\t" \
  2423. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2424. : [a] "l" (va), [b] "l" (vb) \
  2425. : "r5", "r6", "r7", "cc" \
  2426. )
  2427. #else
  2428. /* Multiply va by vb and add double size result into: vo | vh | vl */
  2429. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  2430. __asm__ __volatile__ ( \
  2431. /* al * bl */ \
  2432. "uxth r6, %[a] \n\t" \
  2433. "uxth r5, %[b] \n\t" \
  2434. "mul r5, r6 \n\t" \
  2435. "add %[l], %[l], r5 \n\t" \
  2436. "mov r5, #0 \n\t" \
  2437. "adc %[h], r5 \n\t" \
  2438. "adc %[o], r5 \n\t" \
  2439. /* al * bh */ \
  2440. "lsr r5, %[b], #16 \n\t" \
  2441. "mul r6, r5 \n\t" \
  2442. "lsr r5, r6, #16 \n\t" \
  2443. "lsl r6, r6, #16 \n\t" \
  2444. "add %[l], %[l], r6 \n\t" \
  2445. "adc %[h], r5 \n\t" \
  2446. "mov r5, #0 \n\t" \
  2447. "adc %[o], r5 \n\t" \
  2448. /* ah * bh */ \
  2449. "lsr r6, %[a], #16 \n\t" \
  2450. "lsr r5, %[b], #16 \n\t" \
  2451. "mul r5, r6 \n\t" \
  2452. "add %[h], %[h], r5 \n\t" \
  2453. "mov r5, #0 \n\t" \
  2454. "adc %[o], r5 \n\t" \
  2455. /* ah * bl */ \
  2456. "uxth r5, %[b] \n\t" \
  2457. "mul r6, r5 \n\t" \
  2458. "lsr r5, r6, #16 \n\t" \
  2459. "lsl r6, r6, #16 \n\t" \
  2460. "add %[l], %[l], r6 \n\t" \
  2461. "adc %[h], r5 \n\t" \
  2462. "mov r5, #0 \n\t" \
  2463. "adc %[o], r5 \n\t" \
  2464. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2465. : [a] "l" (va), [b] "l" (vb) \
  2466. : "r5", "r6", "cc" \
  2467. )
  2468. #endif
  2469. /* Multiply va by vb and add double size result into: vh | vl */
  2470. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  2471. __asm__ __volatile__ ( \
  2472. /* al * bl */ \
  2473. "uxth r6, %[a] \n\t" \
  2474. "uxth r4, %[b] \n\t" \
  2475. "mul r4, r6 \n\t" \
  2476. "add %[l], %[l], r4 \n\t" \
  2477. "mov r5, #0 \n\t" \
  2478. "adc %[h], r5 \n\t" \
  2479. /* al * bh */ \
  2480. "lsr r4, %[b], #16 \n\t" \
  2481. "mul r6, r4 \n\t" \
  2482. "lsr r4, r6, #16 \n\t" \
  2483. "lsl r6, r6, #16 \n\t" \
  2484. "add %[l], %[l], r6 \n\t" \
  2485. "adc %[h], r4 \n\t" \
  2486. /* ah * bh */ \
  2487. "lsr r6, %[a], #16 \n\t" \
  2488. "lsr r4, %[b], #16 \n\t" \
  2489. "mul r4, r6 \n\t" \
  2490. "add %[h], %[h], r4 \n\t" \
  2491. /* ah * bl */ \
  2492. "uxth r4, %[b] \n\t" \
  2493. "mul r6, r4 \n\t" \
  2494. "lsr r4, r6, #16 \n\t" \
  2495. "lsl r6, r6, #16 \n\t" \
  2496. "add %[l], %[l], r6 \n\t" \
  2497. "adc %[h], r4 \n\t" \
  2498. : [l] "+l" (vl), [h] "+l" (vh) \
  2499. : [a] "l" (va), [b] "l" (vb) \
  2500. : "r4", "r5", "r6", "cc" \
  2501. )
  2502. #if !defined(WOLFSSL_SP_SMALL) && !defined(DEBUG)
  2503. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2504. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2505. __asm__ __volatile__ ( \
  2506. /* al * bl */ \
  2507. "uxth r6, %[a] \n\t" \
  2508. "uxth r7, %[b] \n\t" \
  2509. "mul r7, r6 \n\t" \
  2510. "add %[l], %[l], r7 \n\t" \
  2511. "mov r5, #0 \n\t" \
  2512. "adc %[h], r5 \n\t" \
  2513. "adc %[o], r5 \n\t" \
  2514. "add %[l], %[l], r7 \n\t" \
  2515. "adc %[h], r5 \n\t" \
  2516. "adc %[o], r5 \n\t" \
  2517. /* al * bh */ \
  2518. "lsr r7, %[b], #16 \n\t" \
  2519. "mul r6, r7 \n\t" \
  2520. "lsr r7, r6, #16 \n\t" \
  2521. "lsl r6, r6, #16 \n\t" \
  2522. "add %[l], %[l], r6 \n\t" \
  2523. "adc %[h], r7 \n\t" \
  2524. "adc %[o], r5 \n\t" \
  2525. "add %[l], %[l], r6 \n\t" \
  2526. "adc %[h], r7 \n\t" \
  2527. "adc %[o], r5 \n\t" \
  2528. /* ah * bh */ \
  2529. "lsr r6, %[a], #16 \n\t" \
  2530. "lsr r7, %[b], #16 \n\t" \
  2531. "mul r7, r6 \n\t" \
  2532. "add %[h], %[h], r7 \n\t" \
  2533. "adc %[o], r5 \n\t" \
  2534. "add %[h], %[h], r7 \n\t" \
  2535. "adc %[o], r5 \n\t" \
  2536. /* ah * bl */ \
  2537. "uxth r7, %[b] \n\t" \
  2538. "mul r6, r7 \n\t" \
  2539. "lsr r7, r6, #16 \n\t" \
  2540. "lsl r6, r6, #16 \n\t" \
  2541. "add %[l], %[l], r6 \n\t" \
  2542. "adc %[h], r7 \n\t" \
  2543. "adc %[o], r5 \n\t" \
  2544. "add %[l], %[l], r6 \n\t" \
  2545. "adc %[h], r7 \n\t" \
  2546. "adc %[o], r5 \n\t" \
  2547. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2548. : [a] "l" (va), [b] "l" (vb) \
  2549. : "r5", "r6", "r7", "cc" \
  2550. )
  2551. #else
  2552. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  2553. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  2554. __asm__ __volatile__ ( \
  2555. "mov r8, %[a] \n\t" \
  2556. /* al * bl */ \
  2557. "uxth r6, %[a] \n\t" \
  2558. "uxth r5, %[b] \n\t" \
  2559. "mul r5, r6 \n\t" \
  2560. "add %[l], %[l], r5 \n\t" \
  2561. "mov %[a], #0 \n\t" \
  2562. "adc %[h], %[a] \n\t" \
  2563. "adc %[o], %[a] \n\t" \
  2564. "add %[l], %[l], r5 \n\t" \
  2565. "adc %[h], %[a] \n\t" \
  2566. "adc %[o], %[a] \n\t" \
  2567. /* al * bh */ \
  2568. "lsr r5, %[b], #16 \n\t" \
  2569. "mul r6, r5 \n\t" \
  2570. "lsr r5, r6, #16 \n\t" \
  2571. "lsl r6, r6, #16 \n\t" \
  2572. "add %[l], %[l], r6 \n\t" \
  2573. "adc %[h], r5 \n\t" \
  2574. "adc %[o], %[a] \n\t" \
  2575. "add %[l], %[l], r6 \n\t" \
  2576. "adc %[h], r5 \n\t" \
  2577. "adc %[o], %[a] \n\t" \
  2578. /* ah * bh */ \
  2579. "mov %[a], r8 \n\t" \
  2580. "lsr r6, %[a], #16 \n\t" \
  2581. "lsr r5, %[b], #16 \n\t" \
  2582. "mul r5, r6 \n\t" \
  2583. "add %[h], %[h], r5 \n\t" \
  2584. "mov %[a], #0 \n\t" \
  2585. "adc %[o], %[a] \n\t" \
  2586. "add %[h], %[h], r5 \n\t" \
  2587. "adc %[o], %[a] \n\t" \
  2588. /* ah * bl */ \
  2589. "uxth r5, %[b] \n\t" \
  2590. "mul r6, r5 \n\t" \
  2591. "lsr r5, r6, #16 \n\t" \
  2592. "lsl r6, r6, #16 \n\t" \
  2593. "add %[l], %[l], r6 \n\t" \
  2594. "adc %[h], r5 \n\t" \
  2595. "adc %[o], %[a] \n\t" \
  2596. "add %[l], %[l], r6 \n\t" \
  2597. "adc %[h], r5 \n\t" \
  2598. "adc %[o], %[a] \n\t" \
  2599. "mov %[a], r8 \n\t" \
  2600. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2601. : [a] "l" (va), [b] "l" (vb) \
  2602. : "r5", "r6", "r8", "cc" \
  2603. )
  2604. #endif
  2605. #ifndef DEBUG
  2606. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2607. * Assumes first add will not overflow vh | vl
  2608. */
  2609. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2610. __asm__ __volatile__ ( \
  2611. /* al * bl */ \
  2612. "uxth r6, %[a] \n\t" \
  2613. "uxth r7, %[b] \n\t" \
  2614. "mul r7, r6 \n\t" \
  2615. "add %[l], %[l], r7 \n\t" \
  2616. "mov r5, #0 \n\t" \
  2617. "adc %[h], r5 \n\t" \
  2618. "add %[l], %[l], r7 \n\t" \
  2619. "adc %[h], r5 \n\t" \
  2620. /* al * bh */ \
  2621. "lsr r7, %[b], #16 \n\t" \
  2622. "mul r6, r7 \n\t" \
  2623. "lsr r7, r6, #16 \n\t" \
  2624. "lsl r6, r6, #16 \n\t" \
  2625. "add %[l], %[l], r6 \n\t" \
  2626. "adc %[h], r7 \n\t" \
  2627. "add %[l], %[l], r6 \n\t" \
  2628. "adc %[h], r7 \n\t" \
  2629. "adc %[o], r5 \n\t" \
  2630. /* ah * bh */ \
  2631. "lsr r6, %[a], #16 \n\t" \
  2632. "lsr r7, %[b], #16 \n\t" \
  2633. "mul r7, r6 \n\t" \
  2634. "add %[h], %[h], r7 \n\t" \
  2635. "adc %[o], r5 \n\t" \
  2636. "add %[h], %[h], r7 \n\t" \
  2637. "adc %[o], r5 \n\t" \
  2638. /* ah * bl */ \
  2639. "uxth r7, %[b] \n\t" \
  2640. "mul r6, r7 \n\t" \
  2641. "lsr r7, r6, #16 \n\t" \
  2642. "lsl r6, r6, #16 \n\t" \
  2643. "add %[l], %[l], r6 \n\t" \
  2644. "adc %[h], r7 \n\t" \
  2645. "adc %[o], r5 \n\t" \
  2646. "add %[l], %[l], r6 \n\t" \
  2647. "adc %[h], r7 \n\t" \
  2648. "adc %[o], r5 \n\t" \
  2649. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2650. : [a] "l" (va), [b] "l" (vb) \
  2651. : "r5", "r6", "r7", "cc" \
  2652. )
  2653. #else
  2654. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  2655. * Assumes first add will not overflow vh | vl
  2656. */
  2657. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  2658. __asm__ __volatile__ ( \
  2659. "mov r8, %[a] \n\t" \
  2660. /* al * bl */ \
  2661. "uxth r5, %[a] \n\t" \
  2662. "uxth r6, %[b] \n\t" \
  2663. "mul r6, r5 \n\t" \
  2664. "add %[l], %[l], r6 \n\t" \
  2665. "mov %[a], #0 \n\t" \
  2666. "adc %[h], %[a] \n\t" \
  2667. "add %[l], %[l], r6 \n\t" \
  2668. "adc %[h], %[a] \n\t" \
  2669. /* al * bh */ \
  2670. "lsr r6, %[b], #16 \n\t" \
  2671. "mul r5, r6 \n\t" \
  2672. "lsr r6, r5, #16 \n\t" \
  2673. "lsl r5, r5, #16 \n\t" \
  2674. "add %[l], %[l], r5 \n\t" \
  2675. "adc %[h], r6 \n\t" \
  2676. "add %[l], %[l], r5 \n\t" \
  2677. "adc %[h], r6 \n\t" \
  2678. "adc %[o], %[a] \n\t" \
  2679. /* ah * bh */ \
  2680. "mov %[a], r8 \n\t" \
  2681. "lsr r5, %[a], #16 \n\t" \
  2682. "lsr r6, %[b], #16 \n\t" \
  2683. "mul r6, r5 \n\t" \
  2684. "mov %[a], #0 \n\t" \
  2685. "add %[h], %[h], r6 \n\t" \
  2686. "adc %[o], %[a] \n\t" \
  2687. "add %[h], %[h], r6 \n\t" \
  2688. "adc %[o], %[a] \n\t" \
  2689. /* ah * bl */ \
  2690. "uxth r6, %[b] \n\t" \
  2691. "mul r5, r6 \n\t" \
  2692. "lsr r6, r5, #16 \n\t" \
  2693. "lsl r5, r5, #16 \n\t" \
  2694. "add %[l], %[l], r5 \n\t" \
  2695. "adc %[h], r6 \n\t" \
  2696. "adc %[o], %[a] \n\t" \
  2697. "add %[l], %[l], r5 \n\t" \
  2698. "adc %[h], r6 \n\t" \
  2699. "adc %[o], %[a] \n\t" \
  2700. "mov %[a], r8 \n\t" \
  2701. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2702. : [a] "l" (va), [b] "l" (vb) \
  2703. : "r5", "r6", "r8", "cc" \
  2704. )
  2705. #endif
  2706. /* Square va and store double size result in: vh | vl */
  2707. #define SP_ASM_SQR(vl, vh, va) \
  2708. __asm__ __volatile__ ( \
  2709. "lsr r5, %[a], #16 \n\t" \
  2710. "uxth r6, %[a] \n\t" \
  2711. "mov %[l], r6 \n\t" \
  2712. "mov %[h], r5 \n\t" \
  2713. /* al * al */ \
  2714. "mul %[l], %[l] \n\t" \
  2715. /* ah * ah */ \
  2716. "mul %[h], %[h] \n\t" \
  2717. /* 2 * al * ah */ \
  2718. "mul r6, r5 \n\t" \
  2719. "lsr r5, r6, #15 \n\t" \
  2720. "lsl r6, r6, #17 \n\t" \
  2721. "add %[l], %[l], r6 \n\t" \
  2722. "adc %[h], r5 \n\t" \
  2723. : [h] "+l" (vh), [l] "+l" (vl) \
  2724. : [a] "l" (va) \
  2725. : "r5", "r6", "cc" \
  2726. )
  2727. /* Square va and add double size result into: vo | vh | vl */
  2728. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  2729. __asm__ __volatile__ ( \
  2730. "lsr r4, %[a], #16 \n\t" \
  2731. "uxth r6, %[a] \n\t" \
  2732. /* al * al */ \
  2733. "mul r6, r6 \n\t" \
  2734. /* ah * ah */ \
  2735. "mul r4, r4 \n\t" \
  2736. "add %[l], %[l], r6 \n\t" \
  2737. "adc %[h], r4 \n\t" \
  2738. "mov r5, #0 \n\t" \
  2739. "adc %[o], r5 \n\t" \
  2740. "lsr r4, %[a], #16 \n\t" \
  2741. "uxth r6, %[a] \n\t" \
  2742. /* 2 * al * ah */ \
  2743. "mul r6, r4 \n\t" \
  2744. "lsr r4, r6, #15 \n\t" \
  2745. "lsl r6, r6, #17 \n\t" \
  2746. "add %[l], %[l], r6 \n\t" \
  2747. "adc %[h], r4 \n\t" \
  2748. "adc %[o], r5 \n\t" \
  2749. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2750. : [a] "l" (va) \
  2751. : "r4", "r5", "r6", "cc" \
  2752. )
  2753. /* Square va and add double size result into: vh | vl */
  2754. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  2755. __asm__ __volatile__ ( \
  2756. "lsr r5, %[a], #16 \n\t" \
  2757. "uxth r6, %[a] \n\t" \
  2758. /* al * al */ \
  2759. "mul r6, r6 \n\t" \
  2760. /* ah * ah */ \
  2761. "mul r5, r5 \n\t" \
  2762. "add %[l], %[l], r6 \n\t" \
  2763. "adc %[h], r5 \n\t" \
  2764. "lsr r5, %[a], #16 \n\t" \
  2765. "uxth r6, %[a] \n\t" \
  2766. /* 2 * al * ah */ \
  2767. "mul r6, r5 \n\t" \
  2768. "lsr r5, r6, #15 \n\t" \
  2769. "lsl r6, r6, #17 \n\t" \
  2770. "add %[l], %[l], r6 \n\t" \
  2771. "adc %[h], r5 \n\t" \
  2772. : [l] "+l" (vl), [h] "+l" (vh) \
  2773. : [a] "l" (va) \
  2774. : "r5", "r6", "cc" \
  2775. )
  2776. /* Add va into: vh | vl */
  2777. #define SP_ASM_ADDC(vl, vh, va) \
  2778. __asm__ __volatile__ ( \
  2779. "add %[l], %[l], %[a] \n\t" \
  2780. "mov r5, #0 \n\t" \
  2781. "adc %[h], r5 \n\t" \
  2782. : [l] "+l" (vl), [h] "+l" (vh) \
  2783. : [a] "l" (va) \
  2784. : "r5", "cc" \
  2785. )
  2786. /* Sub va from: vh | vl */
  2787. #define SP_ASM_SUBB(vl, vh, va) \
  2788. __asm__ __volatile__ ( \
  2789. "sub %[l], %[l], %[a] \n\t" \
  2790. "mov r5, #0 \n\t" \
  2791. "sbc %[h], r5 \n\t" \
  2792. : [l] "+l" (vl), [h] "+l" (vh) \
  2793. : [a] "l" (va) \
  2794. : "r5", "cc" \
  2795. )
  2796. /* Add two times vc | vb | va into vo | vh | vl */
  2797. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  2798. __asm__ __volatile__ ( \
  2799. "add %[l], %[l], %[a] \n\t" \
  2800. "adc %[h], %[b] \n\t" \
  2801. "adc %[o], %[c] \n\t" \
  2802. "add %[l], %[l], %[a] \n\t" \
  2803. "adc %[h], %[b] \n\t" \
  2804. "adc %[o], %[c] \n\t" \
  2805. : [l] "+l" (vl), [h] "+l" (vh), [o] "+l" (vo) \
  2806. : [a] "l" (va), [b] "l" (vb), [c] "l" (vc) \
  2807. : "cc" \
  2808. )
  2809. #endif
  2810. #ifdef WOLFSSL_SP_DIV_WORD_HALF
  2811. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  2812. *
  2813. * No division instruction used - does operation bit by bit.
  2814. * Constant time.
  2815. *
  2816. * @param [in] hi SP integer digit. High digit of the dividend.
  2817. * @param [in] lo SP integer digit. Lower digit of the dividend.
  2818. * @param [in] d SP integer digit. Number to divide by.
  2819. * @return The division result.
  2820. */
  2821. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  2822. sp_int_digit d)
  2823. {
  2824. __asm__ __volatile__ (
  2825. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2826. "lsrs r3, %[d], #24\n\t"
  2827. #else
  2828. "lsr r3, %[d], #24\n\t"
  2829. #endif
  2830. "beq 2%=f\n\t"
  2831. "\n1%=:\n\t"
  2832. "movs r3, #0\n\t"
  2833. "b 3%=f\n\t"
  2834. "\n2%=:\n\t"
  2835. "mov r3, #8\n\t"
  2836. "\n3%=:\n\t"
  2837. "movs r4, #31\n\t"
  2838. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2839. "subs r4, r4, r3\n\t"
  2840. #else
  2841. "sub r4, r4, r3\n\t"
  2842. #endif
  2843. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2844. "lsls %[d], %[d], r3\n\t"
  2845. #else
  2846. "lsl %[d], %[d], r3\n\t"
  2847. #endif
  2848. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2849. "lsls %[hi], %[hi], r3\n\t"
  2850. #else
  2851. "lsl %[hi], %[hi], r3\n\t"
  2852. #endif
  2853. "mov r5, %[lo]\n\t"
  2854. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2855. "lsrs r5, r5, r4\n\t"
  2856. #else
  2857. "lsr r5, r5, r4\n\t"
  2858. #endif
  2859. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2860. "lsls %[lo], %[lo], r3\n\t"
  2861. #else
  2862. "lsl %[lo], %[lo], r3\n\t"
  2863. #endif
  2864. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2865. "lsrs r5, r5, #1\n\t"
  2866. #else
  2867. "lsr r5, r5, #1\n\t"
  2868. #endif
  2869. #if defined(WOLFSSL_KEIL)
  2870. "orrs %[hi], %[hi], r5\n\t"
  2871. #elif defined(__clang__)
  2872. "orrs %[hi], r5\n\t"
  2873. #else
  2874. "orr %[hi], r5\n\t"
  2875. #endif
  2876. "movs r3, #0\n\t"
  2877. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2878. "lsrs r5, %[d], #1\n\t"
  2879. #else
  2880. "lsr r5, %[d], #1\n\t"
  2881. #endif
  2882. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2883. "adds r5, r5, #1\n\t"
  2884. #else
  2885. "add r5, r5, #1\n\t"
  2886. #endif
  2887. "mov r8, %[lo]\n\t"
  2888. "mov r9, %[hi]\n\t"
  2889. /* Do top 32 */
  2890. "movs r6, r5\n\t"
  2891. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2892. "subs r6, r6, %[hi]\n\t"
  2893. #else
  2894. "sub r6, r6, %[hi]\n\t"
  2895. #endif
  2896. #ifdef WOLFSSL_KEIL
  2897. "sbcs r6, r6, r6\n\t"
  2898. #elif defined(__clang__)
  2899. "sbcs r6, r6\n\t"
  2900. #else
  2901. "sbc r6, r6\n\t"
  2902. #endif
  2903. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2904. "adds r3, r3, r3\n\t"
  2905. #else
  2906. "add r3, r3, r3\n\t"
  2907. #endif
  2908. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2909. "subs r3, r3, r6\n\t"
  2910. #else
  2911. "sub r3, r3, r6\n\t"
  2912. #endif
  2913. #ifdef WOLFSSL_KEIL
  2914. "ands r6, r6, r5\n\t"
  2915. #elif defined(__clang__)
  2916. "ands r6, r5\n\t"
  2917. #else
  2918. "and r6, r5\n\t"
  2919. #endif
  2920. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2921. "subs %[hi], %[hi], r6\n\t"
  2922. #else
  2923. "sub %[hi], %[hi], r6\n\t"
  2924. #endif
  2925. "movs r4, #29\n\t"
  2926. "\n"
  2927. "L_sp_div_word_loop%=:\n\t"
  2928. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2929. "lsls %[lo], %[lo], #1\n\t"
  2930. #else
  2931. "lsl %[lo], %[lo], #1\n\t"
  2932. #endif
  2933. #ifdef WOLFSSL_KEIL
  2934. "adcs %[hi], %[hi], %[hi]\n\t"
  2935. #elif defined(__clang__)
  2936. "adcs %[hi], %[hi]\n\t"
  2937. #else
  2938. "adc %[hi], %[hi]\n\t"
  2939. #endif
  2940. "movs r6, r5\n\t"
  2941. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2942. "subs r6, r6, %[hi]\n\t"
  2943. #else
  2944. "sub r6, r6, %[hi]\n\t"
  2945. #endif
  2946. #ifdef WOLFSSL_KEIL
  2947. "sbcs r6, r6, r6\n\t"
  2948. #elif defined(__clang__)
  2949. "sbcs r6, r6\n\t"
  2950. #else
  2951. "sbc r6, r6\n\t"
  2952. #endif
  2953. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2954. "adds r3, r3, r3\n\t"
  2955. #else
  2956. "add r3, r3, r3\n\t"
  2957. #endif
  2958. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2959. "subs r3, r3, r6\n\t"
  2960. #else
  2961. "sub r3, r3, r6\n\t"
  2962. #endif
  2963. #ifdef WOLFSSL_KEIL
  2964. "ands r6, r6, r5\n\t"
  2965. #elif defined(__clang__)
  2966. "ands r6, r5\n\t"
  2967. #else
  2968. "and r6, r5\n\t"
  2969. #endif
  2970. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2971. "subs %[hi], %[hi], r6\n\t"
  2972. #else
  2973. "sub %[hi], %[hi], r6\n\t"
  2974. #endif
  2975. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2976. "subs r4, r4, #1\n\t"
  2977. #else
  2978. "sub r4, r4, #1\n\t"
  2979. #endif
  2980. "bpl L_sp_div_word_loop%=\n\t"
  2981. "movs r7, #0\n\t"
  2982. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2983. "adds r3, r3, r3\n\t"
  2984. #else
  2985. "add r3, r3, r3\n\t"
  2986. #endif
  2987. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  2988. "adds r3, r3, #1\n\t"
  2989. #else
  2990. "add r3, r3, #1\n\t"
  2991. #endif
  2992. /* r * d - Start */
  2993. "uxth %[hi], r3\n\t"
  2994. "uxth r4, %[d]\n\t"
  2995. #ifdef WOLFSSL_KEIL
  2996. "muls r4, %[hi], r4\n\t"
  2997. #elif defined(__clang__)
  2998. "muls r4, %[hi]\n\t"
  2999. #else
  3000. "mul r4, %[hi]\n\t"
  3001. #endif
  3002. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3003. "lsrs r6, %[d], #16\n\t"
  3004. #else
  3005. "lsr r6, %[d], #16\n\t"
  3006. #endif
  3007. #ifdef WOLFSSL_KEIL
  3008. "muls %[hi], r6, %[hi]\n\t"
  3009. #elif defined(__clang__)
  3010. "muls %[hi], r6\n\t"
  3011. #else
  3012. "mul %[hi], r6\n\t"
  3013. #endif
  3014. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3015. "lsrs r5, %[hi], #16\n\t"
  3016. #else
  3017. "lsr r5, %[hi], #16\n\t"
  3018. #endif
  3019. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3020. "lsls %[hi], %[hi], #16\n\t"
  3021. #else
  3022. "lsl %[hi], %[hi], #16\n\t"
  3023. #endif
  3024. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3025. "adds r4, r4, %[hi]\n\t"
  3026. #else
  3027. "add r4, r4, %[hi]\n\t"
  3028. #endif
  3029. #ifdef WOLFSSL_KEIL
  3030. "adcs r5, r5, r7\n\t"
  3031. #elif defined(__clang__)
  3032. "adcs r5, r7\n\t"
  3033. #else
  3034. "adc r5, r7\n\t"
  3035. #endif
  3036. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3037. "lsrs %[hi], r3, #16\n\t"
  3038. #else
  3039. "lsr %[hi], r3, #16\n\t"
  3040. #endif
  3041. #ifdef WOLFSSL_KEIL
  3042. "muls r6, %[hi], r6\n\t"
  3043. #elif defined(__clang__)
  3044. "muls r6, %[hi]\n\t"
  3045. #else
  3046. "mul r6, %[hi]\n\t"
  3047. #endif
  3048. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3049. "adds r5, r5, r6\n\t"
  3050. #else
  3051. "add r5, r5, r6\n\t"
  3052. #endif
  3053. "uxth r6, %[d]\n\t"
  3054. #ifdef WOLFSSL_KEIL
  3055. "muls %[hi], r6, %[hi]\n\t"
  3056. #elif defined(__clang__)
  3057. "muls %[hi], r6\n\t"
  3058. #else
  3059. "mul %[hi], r6\n\t"
  3060. #endif
  3061. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3062. "lsrs r6, %[hi], #16\n\t"
  3063. #else
  3064. "lsr r6, %[hi], #16\n\t"
  3065. #endif
  3066. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3067. "lsls %[hi], %[hi], #16\n\t"
  3068. #else
  3069. "lsl %[hi], %[hi], #16\n\t"
  3070. #endif
  3071. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3072. "adds r4, r4, %[hi]\n\t"
  3073. #else
  3074. "add r4, r4, %[hi]\n\t"
  3075. #endif
  3076. #ifdef WOLFSSL_KEIL
  3077. "adcs r5, r5, r6\n\t"
  3078. #elif defined(__clang__)
  3079. "adcs r5, r6\n\t"
  3080. #else
  3081. "adc r5, r6\n\t"
  3082. #endif
  3083. /* r * d - Done */
  3084. "mov %[hi], r8\n\t"
  3085. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3086. "subs %[hi], %[hi], r4\n\t"
  3087. #else
  3088. "sub %[hi], %[hi], r4\n\t"
  3089. #endif
  3090. "movs r4, %[hi]\n\t"
  3091. "mov %[hi], r9\n\t"
  3092. #ifdef WOLFSSL_KEIL
  3093. "sbcs %[hi], %[hi], r5\n\t"
  3094. #elif defined(__clang__)
  3095. "sbcs %[hi], r5\n\t"
  3096. #else
  3097. "sbc %[hi], r5\n\t"
  3098. #endif
  3099. "movs r5, %[hi]\n\t"
  3100. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3101. "adds r3, r3, r5\n\t"
  3102. #else
  3103. "add r3, r3, r5\n\t"
  3104. #endif
  3105. /* r * d - Start */
  3106. "uxth %[hi], r3\n\t"
  3107. "uxth r4, %[d]\n\t"
  3108. #ifdef WOLFSSL_KEIL
  3109. "muls r4, %[hi], r4\n\t"
  3110. #elif defined(__clang__)
  3111. "muls r4, %[hi]\n\t"
  3112. #else
  3113. "mul r4, %[hi]\n\t"
  3114. #endif
  3115. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3116. "lsrs r6, %[d], #16\n\t"
  3117. #else
  3118. "lsr r6, %[d], #16\n\t"
  3119. #endif
  3120. #ifdef WOLFSSL_KEIL
  3121. "muls %[hi], r6, %[hi]\n\t"
  3122. #elif defined(__clang__)
  3123. "muls %[hi], r6\n\t"
  3124. #else
  3125. "mul %[hi], r6\n\t"
  3126. #endif
  3127. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3128. "lsrs r5, %[hi], #16\n\t"
  3129. #else
  3130. "lsr r5, %[hi], #16\n\t"
  3131. #endif
  3132. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3133. "lsls %[hi], %[hi], #16\n\t"
  3134. #else
  3135. "lsl %[hi], %[hi], #16\n\t"
  3136. #endif
  3137. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3138. "adds r4, r4, %[hi]\n\t"
  3139. #else
  3140. "add r4, r4, %[hi]\n\t"
  3141. #endif
  3142. #ifdef WOLFSSL_KEIL
  3143. "adcs r5, r5, r7\n\t"
  3144. #elif defined(__clang__)
  3145. "adcs r5, r7\n\t"
  3146. #else
  3147. "adc r5, r7\n\t"
  3148. #endif
  3149. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3150. "lsrs %[hi], r3, #16\n\t"
  3151. #else
  3152. "lsr %[hi], r3, #16\n\t"
  3153. #endif
  3154. #ifdef WOLFSSL_KEIL
  3155. "muls r6, %[hi], r6\n\t"
  3156. #elif defined(__clang__)
  3157. "muls r6, %[hi]\n\t"
  3158. #else
  3159. "mul r6, %[hi]\n\t"
  3160. #endif
  3161. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3162. "adds r5, r5, r6\n\t"
  3163. #else
  3164. "add r5, r5, r6\n\t"
  3165. #endif
  3166. "uxth r6, %[d]\n\t"
  3167. #ifdef WOLFSSL_KEIL
  3168. "muls %[hi], r6, %[hi]\n\t"
  3169. #elif defined(__clang__)
  3170. "muls %[hi], r6\n\t"
  3171. #else
  3172. "mul %[hi], r6\n\t"
  3173. #endif
  3174. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3175. "lsrs r6, %[hi], #16\n\t"
  3176. #else
  3177. "lsr r6, %[hi], #16\n\t"
  3178. #endif
  3179. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3180. "lsls %[hi], %[hi], #16\n\t"
  3181. #else
  3182. "lsl %[hi], %[hi], #16\n\t"
  3183. #endif
  3184. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3185. "adds r4, r4, %[hi]\n\t"
  3186. #else
  3187. "add r4, r4, %[hi]\n\t"
  3188. #endif
  3189. #ifdef WOLFSSL_KEIL
  3190. "adcs r5, r5, r6\n\t"
  3191. #elif defined(__clang__)
  3192. "adcs r5, r6\n\t"
  3193. #else
  3194. "adc r5, r6\n\t"
  3195. #endif
  3196. /* r * d - Done */
  3197. "mov %[hi], r8\n\t"
  3198. "mov r6, r9\n\t"
  3199. #ifdef WOLFSSL_KEIL
  3200. "subs r4, %[hi], r4\n\t"
  3201. #else
  3202. #ifdef __clang__
  3203. "subs r4, %[hi], r4\n\t"
  3204. #else
  3205. "sub r4, %[hi], r4\n\t"
  3206. #endif
  3207. #endif
  3208. #ifdef WOLFSSL_KEIL
  3209. "sbcs r6, r6, r5\n\t"
  3210. #elif defined(__clang__)
  3211. "sbcs r6, r5\n\t"
  3212. #else
  3213. "sbc r6, r5\n\t"
  3214. #endif
  3215. "movs r5, r6\n\t"
  3216. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3217. "adds r3, r3, r5\n\t"
  3218. #else
  3219. "add r3, r3, r5\n\t"
  3220. #endif
  3221. /* r * d - Start */
  3222. "uxth %[hi], r3\n\t"
  3223. "uxth r4, %[d]\n\t"
  3224. #ifdef WOLFSSL_KEIL
  3225. "muls r4, %[hi], r4\n\t"
  3226. #elif defined(__clang__)
  3227. "muls r4, %[hi]\n\t"
  3228. #else
  3229. "mul r4, %[hi]\n\t"
  3230. #endif
  3231. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3232. "lsrs r6, %[d], #16\n\t"
  3233. #else
  3234. "lsr r6, %[d], #16\n\t"
  3235. #endif
  3236. #ifdef WOLFSSL_KEIL
  3237. "muls %[hi], r6, %[hi]\n\t"
  3238. #elif defined(__clang__)
  3239. "muls %[hi], r6\n\t"
  3240. #else
  3241. "mul %[hi], r6\n\t"
  3242. #endif
  3243. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3244. "lsrs r5, %[hi], #16\n\t"
  3245. #else
  3246. "lsr r5, %[hi], #16\n\t"
  3247. #endif
  3248. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3249. "lsls %[hi], %[hi], #16\n\t"
  3250. #else
  3251. "lsl %[hi], %[hi], #16\n\t"
  3252. #endif
  3253. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3254. "adds r4, r4, %[hi]\n\t"
  3255. #else
  3256. "add r4, r4, %[hi]\n\t"
  3257. #endif
  3258. #ifdef WOLFSSL_KEIL
  3259. "adcs r5, r5, r7\n\t"
  3260. #elif defined(__clang__)
  3261. "adcs r5, r7\n\t"
  3262. #else
  3263. "adc r5, r7\n\t"
  3264. #endif
  3265. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3266. "lsrs %[hi], r3, #16\n\t"
  3267. #else
  3268. "lsr %[hi], r3, #16\n\t"
  3269. #endif
  3270. #ifdef WOLFSSL_KEIL
  3271. "muls r6, %[hi], r6\n\t"
  3272. #elif defined(__clang__)
  3273. "muls r6, %[hi]\n\t"
  3274. #else
  3275. "mul r6, %[hi]\n\t"
  3276. #endif
  3277. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3278. "adds r5, r5, r6\n\t"
  3279. #else
  3280. "add r5, r5, r6\n\t"
  3281. #endif
  3282. "uxth r6, %[d]\n\t"
  3283. #ifdef WOLFSSL_KEIL
  3284. "muls %[hi], r6, %[hi]\n\t"
  3285. #elif defined(__clang__)
  3286. "muls %[hi], r6\n\t"
  3287. #else
  3288. "mul %[hi], r6\n\t"
  3289. #endif
  3290. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3291. "lsrs r6, %[hi], #16\n\t"
  3292. #else
  3293. "lsr r6, %[hi], #16\n\t"
  3294. #endif
  3295. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3296. "lsls %[hi], %[hi], #16\n\t"
  3297. #else
  3298. "lsl %[hi], %[hi], #16\n\t"
  3299. #endif
  3300. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3301. "adds r4, r4, %[hi]\n\t"
  3302. #else
  3303. "add r4, r4, %[hi]\n\t"
  3304. #endif
  3305. #ifdef WOLFSSL_KEIL
  3306. "adcs r5, r5, r6\n\t"
  3307. #elif defined(__clang__)
  3308. "adcs r5, r6\n\t"
  3309. #else
  3310. "adc r5, r6\n\t"
  3311. #endif
  3312. /* r * d - Done */
  3313. "mov %[hi], r8\n\t"
  3314. "mov r6, r9\n\t"
  3315. #ifdef WOLFSSL_KEIL
  3316. "subs r4, %[hi], r4\n\t"
  3317. #else
  3318. #ifdef __clang__
  3319. "subs r4, %[hi], r4\n\t"
  3320. #else
  3321. "sub r4, %[hi], r4\n\t"
  3322. #endif
  3323. #endif
  3324. #ifdef WOLFSSL_KEIL
  3325. "sbcs r6, r6, r5\n\t"
  3326. #elif defined(__clang__)
  3327. "sbcs r6, r5\n\t"
  3328. #else
  3329. "sbc r6, r5\n\t"
  3330. #endif
  3331. "movs r5, r6\n\t"
  3332. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3333. "adds r3, r3, r5\n\t"
  3334. #else
  3335. "add r3, r3, r5\n\t"
  3336. #endif
  3337. "movs r6, %[d]\n\t"
  3338. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3339. "subs r6, r6, r4\n\t"
  3340. #else
  3341. "sub r6, r6, r4\n\t"
  3342. #endif
  3343. #ifdef WOLFSSL_KEIL
  3344. "sbcs r6, r6, r6\n\t"
  3345. #elif defined(__clang__)
  3346. "sbcs r6, r6\n\t"
  3347. #else
  3348. "sbc r6, r6\n\t"
  3349. #endif
  3350. #if defined(__clang__) || defined(WOLFSSL_KEIL)
  3351. "subs r3, r3, r6\n\t"
  3352. #else
  3353. "sub r3, r3, r6\n\t"
  3354. #endif
  3355. "movs %[hi], r3\n\t"
  3356. : [hi] "+l" (hi), [lo] "+l" (lo), [d] "+l" (d)
  3357. :
  3358. : "r3", "r4", "r5", "r6", "r7", "r8", "r9", "cc"
  3359. );
  3360. return (uint32_t)(size_t)hi;
  3361. }
  3362. #define SP_ASM_DIV_WORD
  3363. #endif /* !WOLFSSL_SP_DIV_WORD_HALF */
  3364. #define SP_INT_ASM_AVAILABLE
  3365. #endif /* WOLFSSL_SP_ARM_THUMB && SP_WORD_SIZE == 32 */
  3366. #if defined(WOLFSSL_SP_PPC64) && SP_WORD_SIZE == 64
  3367. /*
  3368. * CPU: PPC64
  3369. */
  3370. /* Multiply va by vb and store double size result in: vh | vl */
  3371. #define SP_ASM_MUL(vl, vh, va, vb) \
  3372. __asm__ __volatile__ ( \
  3373. "mulld %[l], %[a], %[b] \n\t" \
  3374. "mulhdu %[h], %[a], %[b] \n\t" \
  3375. : [h] "+r" (vh), [l] "+r" (vl) \
  3376. : [a] "r" (va), [b] "r" (vb) \
  3377. : "memory" \
  3378. )
  3379. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3380. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3381. __asm__ __volatile__ ( \
  3382. "mulhdu %[h], %[a], %[b] \n\t" \
  3383. "mulld %[l], %[a], %[b] \n\t" \
  3384. "li %[o], 0 \n\t" \
  3385. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3386. : [a] "r" (va), [b] "r" (vb) \
  3387. : \
  3388. )
  3389. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3390. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3391. __asm__ __volatile__ ( \
  3392. "mulld 16, %[a], %[b] \n\t" \
  3393. "mulhdu 17, %[a], %[b] \n\t" \
  3394. "addc %[l], %[l], 16 \n\t" \
  3395. "adde %[h], %[h], 17 \n\t" \
  3396. "addze %[o], %[o] \n\t" \
  3397. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3398. : [a] "r" (va), [b] "r" (vb) \
  3399. : "16", "17", "cc" \
  3400. )
  3401. /* Multiply va by vb and add double size result into: vh | vl */
  3402. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3403. __asm__ __volatile__ ( \
  3404. "mulld 16, %[a], %[b] \n\t" \
  3405. "mulhdu 17, %[a], %[b] \n\t" \
  3406. "addc %[l], %[l], 16 \n\t" \
  3407. "adde %[h], %[h], 17 \n\t" \
  3408. : [l] "+r" (vl), [h] "+r" (vh) \
  3409. : [a] "r" (va), [b] "r" (vb) \
  3410. : "16", "17", "cc" \
  3411. )
  3412. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3413. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3414. __asm__ __volatile__ ( \
  3415. "mulld 16, %[a], %[b] \n\t" \
  3416. "mulhdu 17, %[a], %[b] \n\t" \
  3417. "addc %[l], %[l], 16 \n\t" \
  3418. "adde %[h], %[h], 17 \n\t" \
  3419. "addze %[o], %[o] \n\t" \
  3420. "addc %[l], %[l], 16 \n\t" \
  3421. "adde %[h], %[h], 17 \n\t" \
  3422. "addze %[o], %[o] \n\t" \
  3423. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3424. : [a] "r" (va), [b] "r" (vb) \
  3425. : "16", "17", "cc" \
  3426. )
  3427. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3428. * Assumes first add will not overflow vh | vl
  3429. */
  3430. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3431. __asm__ __volatile__ ( \
  3432. "mulld 16, %[a], %[b] \n\t" \
  3433. "mulhdu 17, %[a], %[b] \n\t" \
  3434. "addc %[l], %[l], 16 \n\t" \
  3435. "adde %[h], %[h], 17 \n\t" \
  3436. "addc %[l], %[l], 16 \n\t" \
  3437. "adde %[h], %[h], 17 \n\t" \
  3438. "addze %[o], %[o] \n\t" \
  3439. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3440. : [a] "r" (va), [b] "r" (vb) \
  3441. : "16", "17", "cc" \
  3442. )
  3443. /* Square va and store double size result in: vh | vl */
  3444. #define SP_ASM_SQR(vl, vh, va) \
  3445. __asm__ __volatile__ ( \
  3446. "mulld %[l], %[a], %[a] \n\t" \
  3447. "mulhdu %[h], %[a], %[a] \n\t" \
  3448. : [h] "+r" (vh), [l] "+r" (vl) \
  3449. : [a] "r" (va) \
  3450. : "memory" \
  3451. )
  3452. /* Square va and add double size result into: vo | vh | vl */
  3453. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3454. __asm__ __volatile__ ( \
  3455. "mulld 16, %[a], %[a] \n\t" \
  3456. "mulhdu 17, %[a], %[a] \n\t" \
  3457. "addc %[l], %[l], 16 \n\t" \
  3458. "adde %[h], %[h], 17 \n\t" \
  3459. "addze %[o], %[o] \n\t" \
  3460. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3461. : [a] "r" (va) \
  3462. : "16", "17", "cc" \
  3463. )
  3464. /* Square va and add double size result into: vh | vl */
  3465. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3466. __asm__ __volatile__ ( \
  3467. "mulld 16, %[a], %[a] \n\t" \
  3468. "mulhdu 17, %[a], %[a] \n\t" \
  3469. "addc %[l], %[l], 16 \n\t" \
  3470. "adde %[h], %[h], 17 \n\t" \
  3471. : [l] "+r" (vl), [h] "+r" (vh) \
  3472. : [a] "r" (va) \
  3473. : "16", "17", "cc" \
  3474. )
  3475. /* Add va into: vh | vl */
  3476. #define SP_ASM_ADDC(vl, vh, va) \
  3477. __asm__ __volatile__ ( \
  3478. "addc %[l], %[l], %[a] \n\t" \
  3479. "addze %[h], %[h] \n\t" \
  3480. : [l] "+r" (vl), [h] "+r" (vh) \
  3481. : [a] "r" (va) \
  3482. : "cc" \
  3483. )
  3484. /* Sub va from: vh | vl */
  3485. #define SP_ASM_SUBB(vl, vh, va) \
  3486. __asm__ __volatile__ ( \
  3487. "subfc %[l], %[a], %[l] \n\t" \
  3488. "li 16, 0 \n\t" \
  3489. "subfe %[h], 16, %[h] \n\t" \
  3490. : [l] "+r" (vl), [h] "+r" (vh) \
  3491. : [a] "r" (va) \
  3492. : "16", "cc" \
  3493. )
  3494. /* Add two times vc | vb | va into vo | vh | vl */
  3495. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3496. __asm__ __volatile__ ( \
  3497. "addc %[l], %[l], %[a] \n\t" \
  3498. "adde %[h], %[h], %[b] \n\t" \
  3499. "adde %[o], %[o], %[c] \n\t" \
  3500. "addc %[l], %[l], %[a] \n\t" \
  3501. "adde %[h], %[h], %[b] \n\t" \
  3502. "adde %[o], %[o], %[c] \n\t" \
  3503. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3504. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3505. : "cc" \
  3506. )
  3507. /* Count leading zeros. */
  3508. #define SP_ASM_LZCNT(va, vn) \
  3509. __asm__ __volatile__ ( \
  3510. "cntlzd %[n], %[a] \n\t" \
  3511. : [n] "=r" (vn) \
  3512. : [a] "r" (va) \
  3513. : \
  3514. )
  3515. #define SP_INT_ASM_AVAILABLE
  3516. #endif /* WOLFSSL_SP_PPC64 && SP_WORD_SIZE == 64 */
  3517. #if defined(WOLFSSL_SP_PPC) && SP_WORD_SIZE == 32
  3518. /*
  3519. * CPU: PPC 32-bit
  3520. */
  3521. /* Multiply va by vb and store double size result in: vh | vl */
  3522. #define SP_ASM_MUL(vl, vh, va, vb) \
  3523. __asm__ __volatile__ ( \
  3524. "mullw %[l], %[a], %[b] \n\t" \
  3525. "mulhwu %[h], %[a], %[b] \n\t" \
  3526. : [h] "+r" (vh), [l] "+r" (vl) \
  3527. : [a] "r" (va), [b] "r" (vb) \
  3528. : "memory" \
  3529. )
  3530. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3531. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3532. __asm__ __volatile__ ( \
  3533. "mulhwu %[h], %[a], %[b] \n\t" \
  3534. "mullw %[l], %[a], %[b] \n\t" \
  3535. "li %[o], 0 \n\t" \
  3536. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3537. : [a] "r" (va), [b] "r" (vb) \
  3538. )
  3539. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3540. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3541. __asm__ __volatile__ ( \
  3542. "mullw 16, %[a], %[b] \n\t" \
  3543. "mulhwu 17, %[a], %[b] \n\t" \
  3544. "addc %[l], %[l], 16 \n\t" \
  3545. "adde %[h], %[h], 17 \n\t" \
  3546. "addze %[o], %[o] \n\t" \
  3547. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3548. : [a] "r" (va), [b] "r" (vb) \
  3549. : "16", "17", "cc" \
  3550. )
  3551. /* Multiply va by vb and add double size result into: vh | vl */
  3552. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3553. __asm__ __volatile__ ( \
  3554. "mullw 16, %[a], %[b] \n\t" \
  3555. "mulhwu 17, %[a], %[b] \n\t" \
  3556. "addc %[l], %[l], 16 \n\t" \
  3557. "adde %[h], %[h], 17 \n\t" \
  3558. : [l] "+r" (vl), [h] "+r" (vh) \
  3559. : [a] "r" (va), [b] "r" (vb) \
  3560. : "16", "17", "cc" \
  3561. )
  3562. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3563. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3564. __asm__ __volatile__ ( \
  3565. "mullw 16, %[a], %[b] \n\t" \
  3566. "mulhwu 17, %[a], %[b] \n\t" \
  3567. "addc %[l], %[l], 16 \n\t" \
  3568. "adde %[h], %[h], 17 \n\t" \
  3569. "addze %[o], %[o] \n\t" \
  3570. "addc %[l], %[l], 16 \n\t" \
  3571. "adde %[h], %[h], 17 \n\t" \
  3572. "addze %[o], %[o] \n\t" \
  3573. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3574. : [a] "r" (va), [b] "r" (vb) \
  3575. : "16", "17", "cc" \
  3576. )
  3577. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3578. * Assumes first add will not overflow vh | vl
  3579. */
  3580. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3581. __asm__ __volatile__ ( \
  3582. "mullw 16, %[a], %[b] \n\t" \
  3583. "mulhwu 17, %[a], %[b] \n\t" \
  3584. "addc %[l], %[l], 16 \n\t" \
  3585. "adde %[h], %[h], 17 \n\t" \
  3586. "addc %[l], %[l], 16 \n\t" \
  3587. "adde %[h], %[h], 17 \n\t" \
  3588. "addze %[o], %[o] \n\t" \
  3589. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3590. : [a] "r" (va), [b] "r" (vb) \
  3591. : "16", "17", "cc" \
  3592. )
  3593. /* Square va and store double size result in: vh | vl */
  3594. #define SP_ASM_SQR(vl, vh, va) \
  3595. __asm__ __volatile__ ( \
  3596. "mullw %[l], %[a], %[a] \n\t" \
  3597. "mulhwu %[h], %[a], %[a] \n\t" \
  3598. : [h] "+r" (vh), [l] "+r" (vl) \
  3599. : [a] "r" (va) \
  3600. : "memory" \
  3601. )
  3602. /* Square va and add double size result into: vo | vh | vl */
  3603. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3604. __asm__ __volatile__ ( \
  3605. "mullw 16, %[a], %[a] \n\t" \
  3606. "mulhwu 17, %[a], %[a] \n\t" \
  3607. "addc %[l], %[l], 16 \n\t" \
  3608. "adde %[h], %[h], 17 \n\t" \
  3609. "addze %[o], %[o] \n\t" \
  3610. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3611. : [a] "r" (va) \
  3612. : "16", "17", "cc" \
  3613. )
  3614. /* Square va and add double size result into: vh | vl */
  3615. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3616. __asm__ __volatile__ ( \
  3617. "mullw 16, %[a], %[a] \n\t" \
  3618. "mulhwu 17, %[a], %[a] \n\t" \
  3619. "addc %[l], %[l], 16 \n\t" \
  3620. "adde %[h], %[h], 17 \n\t" \
  3621. : [l] "+r" (vl), [h] "+r" (vh) \
  3622. : [a] "r" (va) \
  3623. : "16", "17", "cc" \
  3624. )
  3625. /* Add va into: vh | vl */
  3626. #define SP_ASM_ADDC(vl, vh, va) \
  3627. __asm__ __volatile__ ( \
  3628. "addc %[l], %[l], %[a] \n\t" \
  3629. "addze %[h], %[h] \n\t" \
  3630. : [l] "+r" (vl), [h] "+r" (vh) \
  3631. : [a] "r" (va) \
  3632. : "cc" \
  3633. )
  3634. /* Sub va from: vh | vl */
  3635. #define SP_ASM_SUBB(vl, vh, va) \
  3636. __asm__ __volatile__ ( \
  3637. "subfc %[l], %[a], %[l] \n\t" \
  3638. "li 16, 0 \n\t" \
  3639. "subfe %[h], 16, %[h] \n\t" \
  3640. : [l] "+r" (vl), [h] "+r" (vh) \
  3641. : [a] "r" (va) \
  3642. : "16", "cc" \
  3643. )
  3644. /* Add two times vc | vb | va into vo | vh | vl */
  3645. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3646. __asm__ __volatile__ ( \
  3647. "addc %[l], %[l], %[a] \n\t" \
  3648. "adde %[h], %[h], %[b] \n\t" \
  3649. "adde %[o], %[o], %[c] \n\t" \
  3650. "addc %[l], %[l], %[a] \n\t" \
  3651. "adde %[h], %[h], %[b] \n\t" \
  3652. "adde %[o], %[o], %[c] \n\t" \
  3653. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3654. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3655. : "cc" \
  3656. )
  3657. /* Count leading zeros. */
  3658. #define SP_ASM_LZCNT(va, vn) \
  3659. __asm__ __volatile__ ( \
  3660. "cntlzw %[n], %[a] \n\t" \
  3661. : [n] "=r" (vn) \
  3662. : [a] "r" (va) \
  3663. )
  3664. #define SP_INT_ASM_AVAILABLE
  3665. #endif /* WOLFSSL_SP_PPC && SP_WORD_SIZE == 64 */
  3666. #if defined(WOLFSSL_SP_MIPS64) && SP_WORD_SIZE == 64
  3667. /*
  3668. * CPU: MIPS 64-bit
  3669. */
  3670. /* Multiply va by vb and store double size result in: vh | vl */
  3671. #define SP_ASM_MUL(vl, vh, va, vb) \
  3672. __asm__ __volatile__ ( \
  3673. "dmultu %[a], %[b] \n\t" \
  3674. "mflo %[l] \n\t" \
  3675. "mfhi %[h] \n\t" \
  3676. : [h] "+r" (vh), [l] "+r" (vl) \
  3677. : [a] "r" (va), [b] "r" (vb) \
  3678. : "memory", "$lo", "$hi" \
  3679. )
  3680. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3681. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3682. __asm__ __volatile__ ( \
  3683. "dmultu %[a], %[b] \n\t" \
  3684. "mflo %[l] \n\t" \
  3685. "mfhi %[h] \n\t" \
  3686. "move %[o], $0 \n\t" \
  3687. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3688. : [a] "r" (va), [b] "r" (vb) \
  3689. : "$lo", "$hi" \
  3690. )
  3691. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3692. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3693. __asm__ __volatile__ ( \
  3694. "dmultu %[a], %[b] \n\t" \
  3695. "mflo $10 \n\t" \
  3696. "mfhi $11 \n\t" \
  3697. "daddu %[l], %[l], $10 \n\t" \
  3698. "sltu $12, %[l], $10 \n\t" \
  3699. "daddu %[h], %[h], $12 \n\t" \
  3700. "sltu $12, %[h], $12 \n\t" \
  3701. "daddu %[o], %[o], $12 \n\t" \
  3702. "daddu %[h], %[h], $11 \n\t" \
  3703. "sltu $12, %[h], $11 \n\t" \
  3704. "daddu %[o], %[o], $12 \n\t" \
  3705. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3706. : [a] "r" (va), [b] "r" (vb) \
  3707. : "$10", "$11", "$12", "$lo", "$hi" \
  3708. )
  3709. /* Multiply va by vb and add double size result into: vh | vl */
  3710. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3711. __asm__ __volatile__ ( \
  3712. "dmultu %[a], %[b] \n\t" \
  3713. "mflo $10 \n\t" \
  3714. "mfhi $11 \n\t" \
  3715. "daddu %[l], %[l], $10 \n\t" \
  3716. "sltu $12, %[l], $10 \n\t" \
  3717. "daddu %[h], %[h], $11 \n\t" \
  3718. "daddu %[h], %[h], $12 \n\t" \
  3719. : [l] "+r" (vl), [h] "+r" (vh) \
  3720. : [a] "r" (va), [b] "r" (vb) \
  3721. : "$10", "$11", "$12", "$lo", "$hi" \
  3722. )
  3723. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3724. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3725. __asm__ __volatile__ ( \
  3726. "dmultu %[a], %[b] \n\t" \
  3727. "mflo $10 \n\t" \
  3728. "mfhi $11 \n\t" \
  3729. "daddu %[l], %[l], $10 \n\t" \
  3730. "sltu $12, %[l], $10 \n\t" \
  3731. "daddu %[h], %[h], $12 \n\t" \
  3732. "sltu $12, %[h], $12 \n\t" \
  3733. "daddu %[o], %[o], $12 \n\t" \
  3734. "daddu %[h], %[h], $11 \n\t" \
  3735. "sltu $12, %[h], $11 \n\t" \
  3736. "daddu %[o], %[o], $12 \n\t" \
  3737. "daddu %[l], %[l], $10 \n\t" \
  3738. "sltu $12, %[l], $10 \n\t" \
  3739. "daddu %[h], %[h], $12 \n\t" \
  3740. "sltu $12, %[h], $12 \n\t" \
  3741. "daddu %[o], %[o], $12 \n\t" \
  3742. "daddu %[h], %[h], $11 \n\t" \
  3743. "sltu $12, %[h], $11 \n\t" \
  3744. "daddu %[o], %[o], $12 \n\t" \
  3745. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3746. : [a] "r" (va), [b] "r" (vb) \
  3747. : "$10", "$11", "$12", "$lo", "$hi" \
  3748. )
  3749. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3750. * Assumes first add will not overflow vh | vl
  3751. */
  3752. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3753. __asm__ __volatile__ ( \
  3754. "dmultu %[a], %[b] \n\t" \
  3755. "mflo $10 \n\t" \
  3756. "mfhi $11 \n\t" \
  3757. "daddu %[l], %[l], $10 \n\t" \
  3758. "sltu $12, %[l], $10 \n\t" \
  3759. "daddu %[h], %[h], $11 \n\t" \
  3760. "daddu %[h], %[h], $12 \n\t" \
  3761. "daddu %[l], %[l], $10 \n\t" \
  3762. "sltu $12, %[l], $10 \n\t" \
  3763. "daddu %[h], %[h], $12 \n\t" \
  3764. "sltu $12, %[h], $12 \n\t" \
  3765. "daddu %[o], %[o], $12 \n\t" \
  3766. "daddu %[h], %[h], $11 \n\t" \
  3767. "sltu $12, %[h], $11 \n\t" \
  3768. "daddu %[o], %[o], $12 \n\t" \
  3769. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3770. : [a] "r" (va), [b] "r" (vb) \
  3771. : "$10", "$11", "$12", "$lo", "$hi" \
  3772. )
  3773. /* Square va and store double size result in: vh | vl */
  3774. #define SP_ASM_SQR(vl, vh, va) \
  3775. __asm__ __volatile__ ( \
  3776. "dmultu %[a], %[a] \n\t" \
  3777. "mflo %[l] \n\t" \
  3778. "mfhi %[h] \n\t" \
  3779. : [h] "+r" (vh), [l] "+r" (vl) \
  3780. : [a] "r" (va) \
  3781. : "memory", "$lo", "$hi" \
  3782. )
  3783. /* Square va and add double size result into: vo | vh | vl */
  3784. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3785. __asm__ __volatile__ ( \
  3786. "dmultu %[a], %[a] \n\t" \
  3787. "mflo $10 \n\t" \
  3788. "mfhi $11 \n\t" \
  3789. "daddu %[l], %[l], $10 \n\t" \
  3790. "sltu $12, %[l], $10 \n\t" \
  3791. "daddu %[h], %[h], $12 \n\t" \
  3792. "sltu $12, %[h], $12 \n\t" \
  3793. "daddu %[o], %[o], $12 \n\t" \
  3794. "daddu %[h], %[h], $11 \n\t" \
  3795. "sltu $12, %[h], $11 \n\t" \
  3796. "daddu %[o], %[o], $12 \n\t" \
  3797. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3798. : [a] "r" (va) \
  3799. : "$10", "$11", "$12", "$lo", "$hi" \
  3800. )
  3801. /* Square va and add double size result into: vh | vl */
  3802. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  3803. __asm__ __volatile__ ( \
  3804. "dmultu %[a], %[a] \n\t" \
  3805. "mflo $10 \n\t" \
  3806. "mfhi $11 \n\t" \
  3807. "daddu %[l], %[l], $10 \n\t" \
  3808. "sltu $12, %[l], $10 \n\t" \
  3809. "daddu %[h], %[h], $11 \n\t" \
  3810. "daddu %[h], %[h], $12 \n\t" \
  3811. : [l] "+r" (vl), [h] "+r" (vh) \
  3812. : [a] "r" (va) \
  3813. : "$10", "$11", "$12", "$lo", "$hi" \
  3814. )
  3815. /* Add va into: vh | vl */
  3816. #define SP_ASM_ADDC(vl, vh, va) \
  3817. __asm__ __volatile__ ( \
  3818. "daddu %[l], %[l], %[a] \n\t" \
  3819. "sltu $12, %[l], %[a] \n\t" \
  3820. "daddu %[h], %[h], $12 \n\t" \
  3821. : [l] "+r" (vl), [h] "+r" (vh) \
  3822. : [a] "r" (va) \
  3823. : "$12" \
  3824. )
  3825. /* Sub va from: vh | vl */
  3826. #define SP_ASM_SUBB(vl, vh, va) \
  3827. __asm__ __volatile__ ( \
  3828. "move $12, %[l] \n\t" \
  3829. "dsubu %[l], $12, %[a] \n\t" \
  3830. "sltu $12, $12, %[l] \n\t" \
  3831. "dsubu %[h], %[h], $12 \n\t" \
  3832. : [l] "+r" (vl), [h] "+r" (vh) \
  3833. : [a] "r" (va) \
  3834. : "$12" \
  3835. )
  3836. /* Add two times vc | vb | va into vo | vh | vl */
  3837. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  3838. __asm__ __volatile__ ( \
  3839. "daddu %[l], %[l], %[a] \n\t" \
  3840. "sltu $12, %[l], %[a] \n\t" \
  3841. "daddu %[h], %[h], $12 \n\t" \
  3842. "sltu $12, %[h], $12 \n\t" \
  3843. "daddu %[o], %[o], $12 \n\t" \
  3844. "daddu %[h], %[h], %[b] \n\t" \
  3845. "sltu $12, %[h], %[b] \n\t" \
  3846. "daddu %[o], %[o], %[c] \n\t" \
  3847. "daddu %[o], %[o], $12 \n\t" \
  3848. "daddu %[l], %[l], %[a] \n\t" \
  3849. "sltu $12, %[l], %[a] \n\t" \
  3850. "daddu %[h], %[h], $12 \n\t" \
  3851. "sltu $12, %[h], $12 \n\t" \
  3852. "daddu %[o], %[o], $12 \n\t" \
  3853. "daddu %[h], %[h], %[b] \n\t" \
  3854. "sltu $12, %[h], %[b] \n\t" \
  3855. "daddu %[o], %[o], %[c] \n\t" \
  3856. "daddu %[o], %[o], $12 \n\t" \
  3857. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3858. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  3859. : "$12" \
  3860. )
  3861. #define SP_INT_ASM_AVAILABLE
  3862. #endif /* WOLFSSL_SP_MIPS64 && SP_WORD_SIZE == 64 */
  3863. #if defined(WOLFSSL_SP_MIPS) && SP_WORD_SIZE == 32
  3864. /*
  3865. * CPU: MIPS 32-bit
  3866. */
  3867. /* Multiply va by vb and store double size result in: vh | vl */
  3868. #define SP_ASM_MUL(vl, vh, va, vb) \
  3869. __asm__ __volatile__ ( \
  3870. "multu %[a], %[b] \n\t" \
  3871. "mflo %[l] \n\t" \
  3872. "mfhi %[h] \n\t" \
  3873. : [h] "+r" (vh), [l] "+r" (vl) \
  3874. : [a] "r" (va), [b] "r" (vb) \
  3875. : "memory", "%lo", "%hi" \
  3876. )
  3877. /* Multiply va by vb and store double size result in: vo | vh | vl */
  3878. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  3879. __asm__ __volatile__ ( \
  3880. "multu %[a], %[b] \n\t" \
  3881. "mflo %[l] \n\t" \
  3882. "mfhi %[h] \n\t" \
  3883. "move %[o], $0 \n\t" \
  3884. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  3885. : [a] "r" (va), [b] "r" (vb) \
  3886. : "%lo", "%hi" \
  3887. )
  3888. /* Multiply va by vb and add double size result into: vo | vh | vl */
  3889. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  3890. __asm__ __volatile__ ( \
  3891. "multu %[a], %[b] \n\t" \
  3892. "mflo $10 \n\t" \
  3893. "mfhi $11 \n\t" \
  3894. "addu %[l], %[l], $10 \n\t" \
  3895. "sltu $12, %[l], $10 \n\t" \
  3896. "addu %[h], %[h], $12 \n\t" \
  3897. "sltu $12, %[h], $12 \n\t" \
  3898. "addu %[o], %[o], $12 \n\t" \
  3899. "addu %[h], %[h], $11 \n\t" \
  3900. "sltu $12, %[h], $11 \n\t" \
  3901. "addu %[o], %[o], $12 \n\t" \
  3902. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3903. : [a] "r" (va), [b] "r" (vb) \
  3904. : "$10", "$11", "$12", "%lo", "%hi" \
  3905. )
  3906. /* Multiply va by vb and add double size result into: vh | vl */
  3907. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  3908. __asm__ __volatile__ ( \
  3909. "multu %[a], %[b] \n\t" \
  3910. "mflo $10 \n\t" \
  3911. "mfhi $11 \n\t" \
  3912. "addu %[l], %[l], $10 \n\t" \
  3913. "sltu $12, %[l], $10 \n\t" \
  3914. "addu %[h], %[h], $11 \n\t" \
  3915. "addu %[h], %[h], $12 \n\t" \
  3916. : [l] "+r" (vl), [h] "+r" (vh) \
  3917. : [a] "r" (va), [b] "r" (vb) \
  3918. : "$10", "$11", "$12", "%lo", "%hi" \
  3919. )
  3920. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  3921. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  3922. __asm__ __volatile__ ( \
  3923. "multu %[a], %[b] \n\t" \
  3924. "mflo $10 \n\t" \
  3925. "mfhi $11 \n\t" \
  3926. "addu %[l], %[l], $10 \n\t" \
  3927. "sltu $12, %[l], $10 \n\t" \
  3928. "addu %[h], %[h], $12 \n\t" \
  3929. "sltu $12, %[h], $12 \n\t" \
  3930. "addu %[o], %[o], $12 \n\t" \
  3931. "addu %[h], %[h], $11 \n\t" \
  3932. "sltu $12, %[h], $11 \n\t" \
  3933. "addu %[o], %[o], $12 \n\t" \
  3934. "addu %[l], %[l], $10 \n\t" \
  3935. "sltu $12, %[l], $10 \n\t" \
  3936. "addu %[h], %[h], $12 \n\t" \
  3937. "sltu $12, %[h], $12 \n\t" \
  3938. "addu %[o], %[o], $12 \n\t" \
  3939. "addu %[h], %[h], $11 \n\t" \
  3940. "sltu $12, %[h], $11 \n\t" \
  3941. "addu %[o], %[o], $12 \n\t" \
  3942. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3943. : [a] "r" (va), [b] "r" (vb) \
  3944. : "$10", "$11", "$12", "%lo", "%hi" \
  3945. )
  3946. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  3947. * Assumes first add will not overflow vh | vl
  3948. */
  3949. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  3950. __asm__ __volatile__ ( \
  3951. "multu %[a], %[b] \n\t" \
  3952. "mflo $10 \n\t" \
  3953. "mfhi $11 \n\t" \
  3954. "addu %[l], %[l], $10 \n\t" \
  3955. "sltu $12, %[l], $10 \n\t" \
  3956. "addu %[h], %[h], $11 \n\t" \
  3957. "addu %[h], %[h], $12 \n\t" \
  3958. "addu %[l], %[l], $10 \n\t" \
  3959. "sltu $12, %[l], $10 \n\t" \
  3960. "addu %[h], %[h], $12 \n\t" \
  3961. "sltu $12, %[h], $12 \n\t" \
  3962. "addu %[o], %[o], $12 \n\t" \
  3963. "addu %[h], %[h], $11 \n\t" \
  3964. "sltu $12, %[h], $11 \n\t" \
  3965. "addu %[o], %[o], $12 \n\t" \
  3966. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3967. : [a] "r" (va), [b] "r" (vb) \
  3968. : "$10", "$11", "$12", "%lo", "%hi" \
  3969. )
  3970. /* Square va and store double size result in: vh | vl */
  3971. #define SP_ASM_SQR(vl, vh, va) \
  3972. __asm__ __volatile__ ( \
  3973. "multu %[a], %[a] \n\t" \
  3974. "mflo %[l] \n\t" \
  3975. "mfhi %[h] \n\t" \
  3976. : [h] "+r" (vh), [l] "+r" (vl) \
  3977. : [a] "r" (va) \
  3978. : "memory", "%lo", "%hi" \
  3979. )
  3980. /* Square va and add double size result into: vo | vh | vl */
  3981. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  3982. __asm__ __volatile__ ( \
  3983. "multu %[a], %[a] \n\t" \
  3984. "mflo $10 \n\t" \
  3985. "mfhi $11 \n\t" \
  3986. "addu %[l], %[l], $10 \n\t" \
  3987. "sltu $12, %[l], $10 \n\t" \
  3988. "addu %[h], %[h], $12 \n\t" \
  3989. "sltu $12, %[h], $12 \n\t" \
  3990. "addu %[o], %[o], $12 \n\t" \
  3991. "addu %[h], %[h], $11 \n\t" \
  3992. "sltu $12, %[h], $11 \n\t" \
  3993. "addu %[o], %[o], $12 \n\t" \
  3994. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  3995. : [a] "r" (va) \
  3996. : "$10", "$11", "$12", "%lo", "%hi" \
  3997. )
  3998. /* Square va and add double size result into: vh | vl */
  3999. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4000. __asm__ __volatile__ ( \
  4001. "multu %[a], %[a] \n\t" \
  4002. "mflo $10 \n\t" \
  4003. "mfhi $11 \n\t" \
  4004. "addu %[l], %[l], $10 \n\t" \
  4005. "sltu $12, %[l], $10 \n\t" \
  4006. "addu %[h], %[h], $11 \n\t" \
  4007. "addu %[h], %[h], $12 \n\t" \
  4008. : [l] "+r" (vl), [h] "+r" (vh) \
  4009. : [a] "r" (va) \
  4010. : "$10", "$11", "$12", "%lo", "%hi" \
  4011. )
  4012. /* Add va into: vh | vl */
  4013. #define SP_ASM_ADDC(vl, vh, va) \
  4014. __asm__ __volatile__ ( \
  4015. "addu %[l], %[l], %[a] \n\t" \
  4016. "sltu $12, %[l], %[a] \n\t" \
  4017. "addu %[h], %[h], $12 \n\t" \
  4018. : [l] "+r" (vl), [h] "+r" (vh) \
  4019. : [a] "r" (va) \
  4020. : "$12" \
  4021. )
  4022. /* Sub va from: vh | vl */
  4023. #define SP_ASM_SUBB(vl, vh, va) \
  4024. __asm__ __volatile__ ( \
  4025. "move $12, %[l] \n\t" \
  4026. "subu %[l], $12, %[a] \n\t" \
  4027. "sltu $12, $12, %[l] \n\t" \
  4028. "subu %[h], %[h], $12 \n\t" \
  4029. : [l] "+r" (vl), [h] "+r" (vh) \
  4030. : [a] "r" (va) \
  4031. : "$12" \
  4032. )
  4033. /* Add two times vc | vb | va into vo | vh | vl */
  4034. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4035. __asm__ __volatile__ ( \
  4036. "addu %[l], %[l], %[a] \n\t" \
  4037. "sltu $12, %[l], %[a] \n\t" \
  4038. "addu %[h], %[h], $12 \n\t" \
  4039. "sltu $12, %[h], $12 \n\t" \
  4040. "addu %[o], %[o], $12 \n\t" \
  4041. "addu %[h], %[h], %[b] \n\t" \
  4042. "sltu $12, %[h], %[b] \n\t" \
  4043. "addu %[o], %[o], %[c] \n\t" \
  4044. "addu %[o], %[o], $12 \n\t" \
  4045. "addu %[l], %[l], %[a] \n\t" \
  4046. "sltu $12, %[l], %[a] \n\t" \
  4047. "addu %[h], %[h], $12 \n\t" \
  4048. "sltu $12, %[h], $12 \n\t" \
  4049. "addu %[o], %[o], $12 \n\t" \
  4050. "addu %[h], %[h], %[b] \n\t" \
  4051. "sltu $12, %[h], %[b] \n\t" \
  4052. "addu %[o], %[o], %[c] \n\t" \
  4053. "addu %[o], %[o], $12 \n\t" \
  4054. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4055. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4056. : "$12" \
  4057. )
  4058. #define SP_INT_ASM_AVAILABLE
  4059. #endif /* WOLFSSL_SP_MIPS && SP_WORD_SIZE == 32 */
  4060. #if defined(WOLFSSL_SP_RISCV64) && SP_WORD_SIZE == 64
  4061. /*
  4062. * CPU: RISCV 64-bit
  4063. */
  4064. /* Multiply va by vb and store double size result in: vh | vl */
  4065. #define SP_ASM_MUL(vl, vh, va, vb) \
  4066. __asm__ __volatile__ ( \
  4067. "mul %[l], %[a], %[b] \n\t" \
  4068. "mulhu %[h], %[a], %[b] \n\t" \
  4069. : [h] "+r" (vh), [l] "+r" (vl) \
  4070. : [a] "r" (va), [b] "r" (vb) \
  4071. : "memory" \
  4072. )
  4073. /* Multiply va by vb and store double size result in: vo | vh | vl */
  4074. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  4075. __asm__ __volatile__ ( \
  4076. "mulhu %[h], %[a], %[b] \n\t" \
  4077. "mul %[l], %[a], %[b] \n\t" \
  4078. "add %[o], zero, zero \n\t" \
  4079. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  4080. : [a] "r" (va), [b] "r" (vb) \
  4081. : \
  4082. )
  4083. /* Multiply va by vb and add double size result into: vo | vh | vl */
  4084. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  4085. __asm__ __volatile__ ( \
  4086. "mul a5, %[a], %[b] \n\t" \
  4087. "mulhu a6, %[a], %[b] \n\t" \
  4088. "add %[l], %[l], a5 \n\t" \
  4089. "sltu a7, %[l], a5 \n\t" \
  4090. "add %[h], %[h], a7 \n\t" \
  4091. "sltu a7, %[h], a7 \n\t" \
  4092. "add %[o], %[o], a7 \n\t" \
  4093. "add %[h], %[h], a6 \n\t" \
  4094. "sltu a7, %[h], a6 \n\t" \
  4095. "add %[o], %[o], a7 \n\t" \
  4096. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4097. : [a] "r" (va), [b] "r" (vb) \
  4098. : "a5", "a6", "a7" \
  4099. )
  4100. /* Multiply va by vb and add double size result into: vh | vl */
  4101. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  4102. __asm__ __volatile__ ( \
  4103. "mul a5, %[a], %[b] \n\t" \
  4104. "mulhu a6, %[a], %[b] \n\t" \
  4105. "add %[l], %[l], a5 \n\t" \
  4106. "sltu a7, %[l], a5 \n\t" \
  4107. "add %[h], %[h], a6 \n\t" \
  4108. "add %[h], %[h], a7 \n\t" \
  4109. : [l] "+r" (vl), [h] "+r" (vh) \
  4110. : [a] "r" (va), [b] "r" (vb) \
  4111. : "a5", "a6", "a7" \
  4112. )
  4113. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  4114. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  4115. __asm__ __volatile__ ( \
  4116. "mul a5, %[a], %[b] \n\t" \
  4117. "mulhu a6, %[a], %[b] \n\t" \
  4118. "add %[l], %[l], a5 \n\t" \
  4119. "sltu a7, %[l], a5 \n\t" \
  4120. "add %[h], %[h], a7 \n\t" \
  4121. "sltu a7, %[h], a7 \n\t" \
  4122. "add %[o], %[o], a7 \n\t" \
  4123. "add %[h], %[h], a6 \n\t" \
  4124. "sltu a7, %[h], a6 \n\t" \
  4125. "add %[o], %[o], a7 \n\t" \
  4126. "add %[l], %[l], a5 \n\t" \
  4127. "sltu a7, %[l], a5 \n\t" \
  4128. "add %[h], %[h], a7 \n\t" \
  4129. "sltu a7, %[h], a7 \n\t" \
  4130. "add %[o], %[o], a7 \n\t" \
  4131. "add %[h], %[h], a6 \n\t" \
  4132. "sltu a7, %[h], a6 \n\t" \
  4133. "add %[o], %[o], a7 \n\t" \
  4134. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4135. : [a] "r" (va), [b] "r" (vb) \
  4136. : "a5", "a6", "a7" \
  4137. )
  4138. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  4139. * Assumes first add will not overflow vh | vl
  4140. */
  4141. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  4142. __asm__ __volatile__ ( \
  4143. "mul a5, %[a], %[b] \n\t" \
  4144. "mulhu a6, %[a], %[b] \n\t" \
  4145. "add %[l], %[l], a5 \n\t" \
  4146. "sltu a7, %[l], a5 \n\t" \
  4147. "add %[h], %[h], a6 \n\t" \
  4148. "add %[h], %[h], a7 \n\t" \
  4149. "add %[l], %[l], a5 \n\t" \
  4150. "sltu a7, %[l], a5 \n\t" \
  4151. "add %[h], %[h], a7 \n\t" \
  4152. "sltu a7, %[h], a7 \n\t" \
  4153. "add %[o], %[o], a7 \n\t" \
  4154. "add %[h], %[h], a6 \n\t" \
  4155. "sltu a7, %[h], a6 \n\t" \
  4156. "add %[o], %[o], a7 \n\t" \
  4157. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4158. : [a] "r" (va), [b] "r" (vb) \
  4159. : "a5", "a6", "a7" \
  4160. )
  4161. /* Square va and store double size result in: vh | vl */
  4162. #define SP_ASM_SQR(vl, vh, va) \
  4163. __asm__ __volatile__ ( \
  4164. "mul %[l], %[a], %[a] \n\t" \
  4165. "mulhu %[h], %[a], %[a] \n\t" \
  4166. : [h] "+r" (vh), [l] "+r" (vl) \
  4167. : [a] "r" (va) \
  4168. : "memory" \
  4169. )
  4170. /* Square va and add double size result into: vo | vh | vl */
  4171. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  4172. __asm__ __volatile__ ( \
  4173. "mul a5, %[a], %[a] \n\t" \
  4174. "mulhu a6, %[a], %[a] \n\t" \
  4175. "add %[l], %[l], a5 \n\t" \
  4176. "sltu a7, %[l], a5 \n\t" \
  4177. "add %[h], %[h], a7 \n\t" \
  4178. "sltu a7, %[h], a7 \n\t" \
  4179. "add %[o], %[o], a7 \n\t" \
  4180. "add %[h], %[h], a6 \n\t" \
  4181. "sltu a7, %[h], a6 \n\t" \
  4182. "add %[o], %[o], a7 \n\t" \
  4183. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4184. : [a] "r" (va) \
  4185. : "a5", "a6", "a7" \
  4186. )
  4187. /* Square va and add double size result into: vh | vl */
  4188. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4189. __asm__ __volatile__ ( \
  4190. "mul a5, %[a], %[a] \n\t" \
  4191. "mulhu a6, %[a], %[a] \n\t" \
  4192. "add %[l], %[l], a5 \n\t" \
  4193. "sltu a7, %[l], a5 \n\t" \
  4194. "add %[h], %[h], a6 \n\t" \
  4195. "add %[h], %[h], a7 \n\t" \
  4196. : [l] "+r" (vl), [h] "+r" (vh) \
  4197. : [a] "r" (va) \
  4198. : "a5", "a6", "a7" \
  4199. )
  4200. /* Add va into: vh | vl */
  4201. #define SP_ASM_ADDC(vl, vh, va) \
  4202. __asm__ __volatile__ ( \
  4203. "add %[l], %[l], %[a] \n\t" \
  4204. "sltu a7, %[l], %[a] \n\t" \
  4205. "add %[h], %[h], a7 \n\t" \
  4206. : [l] "+r" (vl), [h] "+r" (vh) \
  4207. : [a] "r" (va) \
  4208. : "a7" \
  4209. )
  4210. /* Sub va from: vh | vl */
  4211. #define SP_ASM_SUBB(vl, vh, va) \
  4212. __asm__ __volatile__ ( \
  4213. "add a7, %[l], zero \n\t" \
  4214. "sub %[l], a7, %[a] \n\t" \
  4215. "sltu a7, a7, %[l] \n\t" \
  4216. "sub %[h], %[h], a7 \n\t" \
  4217. : [l] "+r" (vl), [h] "+r" (vh) \
  4218. : [a] "r" (va) \
  4219. : "a7" \
  4220. )
  4221. /* Add two times vc | vb | va into vo | vh | vl */
  4222. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4223. __asm__ __volatile__ ( \
  4224. "add %[l], %[l], %[a] \n\t" \
  4225. "sltu a7, %[l], %[a] \n\t" \
  4226. "add %[h], %[h], a7 \n\t" \
  4227. "sltu a7, %[h], a7 \n\t" \
  4228. "add %[o], %[o], a7 \n\t" \
  4229. "add %[h], %[h], %[b] \n\t" \
  4230. "sltu a7, %[h], %[b] \n\t" \
  4231. "add %[o], %[o], %[c] \n\t" \
  4232. "add %[o], %[o], a7 \n\t" \
  4233. "add %[l], %[l], %[a] \n\t" \
  4234. "sltu a7, %[l], %[a] \n\t" \
  4235. "add %[h], %[h], a7 \n\t" \
  4236. "sltu a7, %[h], a7 \n\t" \
  4237. "add %[o], %[o], a7 \n\t" \
  4238. "add %[h], %[h], %[b] \n\t" \
  4239. "sltu a7, %[h], %[b] \n\t" \
  4240. "add %[o], %[o], %[c] \n\t" \
  4241. "add %[o], %[o], a7 \n\t" \
  4242. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4243. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4244. : "a7" \
  4245. )
  4246. #define SP_INT_ASM_AVAILABLE
  4247. #endif /* WOLFSSL_SP_RISCV64 && SP_WORD_SIZE == 64 */
  4248. #if defined(WOLFSSL_SP_RISCV32) && SP_WORD_SIZE == 32
  4249. /*
  4250. * CPU: RISCV 32-bit
  4251. */
  4252. /* Multiply va by vb and store double size result in: vh | vl */
  4253. #define SP_ASM_MUL(vl, vh, va, vb) \
  4254. __asm__ __volatile__ ( \
  4255. "mul %[l], %[a], %[b] \n\t" \
  4256. "mulhu %[h], %[a], %[b] \n\t" \
  4257. : [h] "+r" (vh), [l] "+r" (vl) \
  4258. : [a] "r" (va), [b] "r" (vb) \
  4259. : "memory" \
  4260. )
  4261. /* Multiply va by vb and store double size result in: vo | vh | vl */
  4262. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  4263. __asm__ __volatile__ ( \
  4264. "mulhu %[h], %[a], %[b] \n\t" \
  4265. "mul %[l], %[a], %[b] \n\t" \
  4266. "add %[o], zero, zero \n\t" \
  4267. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  4268. : [a] "r" (va), [b] "r" (vb) \
  4269. : \
  4270. )
  4271. /* Multiply va by vb and add double size result into: vo | vh | vl */
  4272. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  4273. __asm__ __volatile__ ( \
  4274. "mul a5, %[a], %[b] \n\t" \
  4275. "mulhu a6, %[a], %[b] \n\t" \
  4276. "add %[l], %[l], a5 \n\t" \
  4277. "sltu a7, %[l], a5 \n\t" \
  4278. "add %[h], %[h], a7 \n\t" \
  4279. "sltu a7, %[h], a7 \n\t" \
  4280. "add %[o], %[o], a7 \n\t" \
  4281. "add %[h], %[h], a6 \n\t" \
  4282. "sltu a7, %[h], a6 \n\t" \
  4283. "add %[o], %[o], a7 \n\t" \
  4284. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4285. : [a] "r" (va), [b] "r" (vb) \
  4286. : "a5", "a6", "a7" \
  4287. )
  4288. /* Multiply va by vb and add double size result into: vh | vl */
  4289. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  4290. __asm__ __volatile__ ( \
  4291. "mul a5, %[a], %[b] \n\t" \
  4292. "mulhu a6, %[a], %[b] \n\t" \
  4293. "add %[l], %[l], a5 \n\t" \
  4294. "sltu a7, %[l], a5 \n\t" \
  4295. "add %[h], %[h], a6 \n\t" \
  4296. "add %[h], %[h], a7 \n\t" \
  4297. : [l] "+r" (vl), [h] "+r" (vh) \
  4298. : [a] "r" (va), [b] "r" (vb) \
  4299. : "a5", "a6", "a7" \
  4300. )
  4301. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  4302. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  4303. __asm__ __volatile__ ( \
  4304. "mul a5, %[a], %[b] \n\t" \
  4305. "mulhu a6, %[a], %[b] \n\t" \
  4306. "add %[l], %[l], a5 \n\t" \
  4307. "sltu a7, %[l], a5 \n\t" \
  4308. "add %[h], %[h], a7 \n\t" \
  4309. "sltu a7, %[h], a7 \n\t" \
  4310. "add %[o], %[o], a7 \n\t" \
  4311. "add %[h], %[h], a6 \n\t" \
  4312. "sltu a7, %[h], a6 \n\t" \
  4313. "add %[o], %[o], a7 \n\t" \
  4314. "add %[l], %[l], a5 \n\t" \
  4315. "sltu a7, %[l], a5 \n\t" \
  4316. "add %[h], %[h], a7 \n\t" \
  4317. "sltu a7, %[h], a7 \n\t" \
  4318. "add %[o], %[o], a7 \n\t" \
  4319. "add %[h], %[h], a6 \n\t" \
  4320. "sltu a7, %[h], a6 \n\t" \
  4321. "add %[o], %[o], a7 \n\t" \
  4322. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4323. : [a] "r" (va), [b] "r" (vb) \
  4324. : "a5", "a6", "a7" \
  4325. )
  4326. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  4327. * Assumes first add will not overflow vh | vl
  4328. */
  4329. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  4330. __asm__ __volatile__ ( \
  4331. "mul a5, %[a], %[b] \n\t" \
  4332. "mulhu a6, %[a], %[b] \n\t" \
  4333. "add %[l], %[l], a5 \n\t" \
  4334. "sltu a7, %[l], a5 \n\t" \
  4335. "add %[h], %[h], a6 \n\t" \
  4336. "add %[h], %[h], a7 \n\t" \
  4337. "add %[l], %[l], a5 \n\t" \
  4338. "sltu a7, %[l], a5 \n\t" \
  4339. "add %[h], %[h], a7 \n\t" \
  4340. "sltu a7, %[h], a7 \n\t" \
  4341. "add %[o], %[o], a7 \n\t" \
  4342. "add %[h], %[h], a6 \n\t" \
  4343. "sltu a7, %[h], a6 \n\t" \
  4344. "add %[o], %[o], a7 \n\t" \
  4345. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4346. : [a] "r" (va), [b] "r" (vb) \
  4347. : "a5", "a6", "a7" \
  4348. )
  4349. /* Square va and store double size result in: vh | vl */
  4350. #define SP_ASM_SQR(vl, vh, va) \
  4351. __asm__ __volatile__ ( \
  4352. "mul %[l], %[a], %[a] \n\t" \
  4353. "mulhu %[h], %[a], %[a] \n\t" \
  4354. : [h] "+r" (vh), [l] "+r" (vl) \
  4355. : [a] "r" (va) \
  4356. : "memory" \
  4357. )
  4358. /* Square va and add double size result into: vo | vh | vl */
  4359. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  4360. __asm__ __volatile__ ( \
  4361. "mul a5, %[a], %[a] \n\t" \
  4362. "mulhu a6, %[a], %[a] \n\t" \
  4363. "add %[l], %[l], a5 \n\t" \
  4364. "sltu a7, %[l], a5 \n\t" \
  4365. "add %[h], %[h], a7 \n\t" \
  4366. "sltu a7, %[h], a7 \n\t" \
  4367. "add %[o], %[o], a7 \n\t" \
  4368. "add %[h], %[h], a6 \n\t" \
  4369. "sltu a7, %[h], a6 \n\t" \
  4370. "add %[o], %[o], a7 \n\t" \
  4371. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4372. : [a] "r" (va) \
  4373. : "a5", "a6", "a7" \
  4374. )
  4375. /* Square va and add double size result into: vh | vl */
  4376. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4377. __asm__ __volatile__ ( \
  4378. "mul a5, %[a], %[a] \n\t" \
  4379. "mulhu a6, %[a], %[a] \n\t" \
  4380. "add %[l], %[l], a5 \n\t" \
  4381. "sltu a7, %[l], a5 \n\t" \
  4382. "add %[h], %[h], a6 \n\t" \
  4383. "add %[h], %[h], a7 \n\t" \
  4384. : [l] "+r" (vl), [h] "+r" (vh) \
  4385. : [a] "r" (va) \
  4386. : "a5", "a6", "a7" \
  4387. )
  4388. /* Add va into: vh | vl */
  4389. #define SP_ASM_ADDC(vl, vh, va) \
  4390. __asm__ __volatile__ ( \
  4391. "add %[l], %[l], %[a] \n\t" \
  4392. "sltu a7, %[l], %[a] \n\t" \
  4393. "add %[h], %[h], a7 \n\t" \
  4394. : [l] "+r" (vl), [h] "+r" (vh) \
  4395. : [a] "r" (va) \
  4396. : "a7" \
  4397. )
  4398. /* Sub va from: vh | vl */
  4399. #define SP_ASM_SUBB(vl, vh, va) \
  4400. __asm__ __volatile__ ( \
  4401. "add a7, %[l], zero \n\t" \
  4402. "sub %[l], a7, %[a] \n\t" \
  4403. "sltu a7, a7, %[l] \n\t" \
  4404. "sub %[h], %[h], a7 \n\t" \
  4405. : [l] "+r" (vl), [h] "+r" (vh) \
  4406. : [a] "r" (va) \
  4407. : "a7" \
  4408. )
  4409. /* Add two times vc | vb | va into vo | vh | vl */
  4410. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4411. __asm__ __volatile__ ( \
  4412. "add %[l], %[l], %[a] \n\t" \
  4413. "sltu a7, %[l], %[a] \n\t" \
  4414. "add %[h], %[h], a7 \n\t" \
  4415. "sltu a7, %[h], a7 \n\t" \
  4416. "add %[o], %[o], a7 \n\t" \
  4417. "add %[h], %[h], %[b] \n\t" \
  4418. "sltu a7, %[h], %[b] \n\t" \
  4419. "add %[o], %[o], %[c] \n\t" \
  4420. "add %[o], %[o], a7 \n\t" \
  4421. "add %[l], %[l], %[a] \n\t" \
  4422. "sltu a7, %[l], %[a] \n\t" \
  4423. "add %[h], %[h], a7 \n\t" \
  4424. "sltu a7, %[h], a7 \n\t" \
  4425. "add %[o], %[o], a7 \n\t" \
  4426. "add %[h], %[h], %[b] \n\t" \
  4427. "sltu a7, %[h], %[b] \n\t" \
  4428. "add %[o], %[o], %[c] \n\t" \
  4429. "add %[o], %[o], a7 \n\t" \
  4430. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4431. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4432. : "a7" \
  4433. )
  4434. #define SP_INT_ASM_AVAILABLE
  4435. #endif /* WOLFSSL_SP_RISCV32 && SP_WORD_SIZE == 32 */
  4436. #if defined(WOLFSSL_SP_S390X) && SP_WORD_SIZE == 64
  4437. /*
  4438. * CPU: Intel s390x
  4439. */
  4440. /* Multiply va by vb and store double size result in: vh | vl */
  4441. #define SP_ASM_MUL(vl, vh, va, vb) \
  4442. __asm__ __volatile__ ( \
  4443. "lgr %%r1, %[a] \n\t" \
  4444. "mlgr %%r0, %[b] \n\t" \
  4445. "lgr %[l], %%r1 \n\t" \
  4446. "lgr %[h], %%r0 \n\t" \
  4447. : [h] "+r" (vh), [l] "+r" (vl) \
  4448. : [a] "r" (va), [b] "r" (vb) \
  4449. : "memory", "r0", "r1" \
  4450. )
  4451. /* Multiply va by vb and store double size result in: vo | vh | vl */
  4452. #define SP_ASM_MUL_SET(vl, vh, vo, va, vb) \
  4453. __asm__ __volatile__ ( \
  4454. "lgr %%r1, %[a] \n\t" \
  4455. "mlgr %%r0, %[b] \n\t" \
  4456. "lghi %[o], 0 \n\t" \
  4457. "lgr %[l], %%r1 \n\t" \
  4458. "lgr %[h], %%r0 \n\t" \
  4459. : [l] "+r" (vl), [h] "+r" (vh), [o] "=r" (vo) \
  4460. : [a] "r" (va), [b] "r" (vb) \
  4461. : "r0", "r1" \
  4462. )
  4463. /* Multiply va by vb and add double size result into: vo | vh | vl */
  4464. #define SP_ASM_MUL_ADD(vl, vh, vo, va, vb) \
  4465. __asm__ __volatile__ ( \
  4466. "lghi %%r10, 0 \n\t" \
  4467. "lgr %%r1, %[a] \n\t" \
  4468. "mlgr %%r0, %[b] \n\t" \
  4469. "algr %[l], %%r1 \n\t" \
  4470. "alcgr %[h], %%r0 \n\t" \
  4471. "alcgr %[o], %%r10 \n\t" \
  4472. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4473. : [a] "r" (va), [b] "r" (vb) \
  4474. : "r0", "r1", "r10", "cc" \
  4475. )
  4476. /* Multiply va by vb and add double size result into: vh | vl */
  4477. #define SP_ASM_MUL_ADD_NO(vl, vh, va, vb) \
  4478. __asm__ __volatile__ ( \
  4479. "lgr %%r1, %[a] \n\t" \
  4480. "mlgr %%r0, %[b] \n\t" \
  4481. "algr %[l], %%r1 \n\t" \
  4482. "alcgr %[h], %%r0 \n\t" \
  4483. : [l] "+r" (vl), [h] "+r" (vh) \
  4484. : [a] "r" (va), [b] "r" (vb) \
  4485. : "r0", "r1", "cc" \
  4486. )
  4487. /* Multiply va by vb and add double size result twice into: vo | vh | vl */
  4488. #define SP_ASM_MUL_ADD2(vl, vh, vo, va, vb) \
  4489. __asm__ __volatile__ ( \
  4490. "lghi %%r10, 0 \n\t" \
  4491. "lgr %%r1, %[a] \n\t" \
  4492. "mlgr %%r0, %[b] \n\t" \
  4493. "algr %[l], %%r1 \n\t" \
  4494. "alcgr %[h], %%r0 \n\t" \
  4495. "alcgr %[o], %%r10 \n\t" \
  4496. "algr %[l], %%r1 \n\t" \
  4497. "alcgr %[h], %%r0 \n\t" \
  4498. "alcgr %[o], %%r10 \n\t" \
  4499. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4500. : [a] "r" (va), [b] "r" (vb) \
  4501. : "r0", "r1", "r10", "cc" \
  4502. )
  4503. /* Multiply va by vb and add double size result twice into: vo | vh | vl
  4504. * Assumes first add will not overflow vh | vl
  4505. */
  4506. #define SP_ASM_MUL_ADD2_NO(vl, vh, vo, va, vb) \
  4507. __asm__ __volatile__ ( \
  4508. "lghi %%r10, 0 \n\t" \
  4509. "lgr %%r1, %[a] \n\t" \
  4510. "mlgr %%r0, %[b] \n\t" \
  4511. "algr %[l], %%r1 \n\t" \
  4512. "alcgr %[h], %%r0 \n\t" \
  4513. "algr %[l], %%r1 \n\t" \
  4514. "alcgr %[h], %%r0 \n\t" \
  4515. "alcgr %[o], %%r10 \n\t" \
  4516. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4517. : [a] "r" (va), [b] "r" (vb) \
  4518. : "r0", "r1", "r10", "cc" \
  4519. )
  4520. /* Square va and store double size result in: vh | vl */
  4521. #define SP_ASM_SQR(vl, vh, va) \
  4522. __asm__ __volatile__ ( \
  4523. "lgr %%r1, %[a] \n\t" \
  4524. "mlgr %%r0, %%r1 \n\t" \
  4525. "lgr %[l], %%r1 \n\t" \
  4526. "lgr %[h], %%r0 \n\t" \
  4527. : [h] "+r" (vh), [l] "+r" (vl) \
  4528. : [a] "r" (va) \
  4529. : "memory", "r0", "r1" \
  4530. )
  4531. /* Square va and add double size result into: vo | vh | vl */
  4532. #define SP_ASM_SQR_ADD(vl, vh, vo, va) \
  4533. __asm__ __volatile__ ( \
  4534. "lghi %%r10, 0 \n\t" \
  4535. "lgr %%r1, %[a] \n\t" \
  4536. "mlgr %%r0, %%r1 \n\t" \
  4537. "algr %[l], %%r1 \n\t" \
  4538. "alcgr %[h], %%r0 \n\t" \
  4539. "alcgr %[o], %%r10 \n\t" \
  4540. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4541. : [a] "r" (va) \
  4542. : "r0", "r1", "r10", "cc" \
  4543. )
  4544. /* Square va and add double size result into: vh | vl */
  4545. #define SP_ASM_SQR_ADD_NO(vl, vh, va) \
  4546. __asm__ __volatile__ ( \
  4547. "lgr %%r1, %[a] \n\t" \
  4548. "mlgr %%r0, %%r1 \n\t" \
  4549. "algr %[l], %%r1 \n\t" \
  4550. "alcgr %[h], %%r0 \n\t" \
  4551. : [l] "+r" (vl), [h] "+r" (vh) \
  4552. : [a] "r" (va) \
  4553. : "r0", "r1", "cc" \
  4554. )
  4555. /* Add va into: vh | vl */
  4556. #define SP_ASM_ADDC(vl, vh, va) \
  4557. __asm__ __volatile__ ( \
  4558. "lghi %%r10, 0 \n\t" \
  4559. "algr %[l], %[a] \n\t" \
  4560. "alcgr %[h], %%r10 \n\t" \
  4561. : [l] "+r" (vl), [h] "+r" (vh) \
  4562. : [a] "r" (va) \
  4563. : "r10", "cc" \
  4564. )
  4565. /* Sub va from: vh | vl */
  4566. #define SP_ASM_SUBB(vl, vh, va) \
  4567. __asm__ __volatile__ ( \
  4568. "lghi %%r10, 0 \n\t" \
  4569. "slgr %[l], %[a] \n\t" \
  4570. "slbgr %[h], %%r10 \n\t" \
  4571. : [l] "+r" (vl), [h] "+r" (vh) \
  4572. : [a] "r" (va) \
  4573. : "r10", "cc" \
  4574. )
  4575. /* Add two times vc | vb | va into vo | vh | vl */
  4576. #define SP_ASM_ADD_DBL_3(vl, vh, vo, va, vb, vc) \
  4577. __asm__ __volatile__ ( \
  4578. "algr %[l], %[a] \n\t" \
  4579. "alcgr %[h], %[b] \n\t" \
  4580. "alcgr %[o], %[c] \n\t" \
  4581. "algr %[l], %[a] \n\t" \
  4582. "alcgr %[h], %[b] \n\t" \
  4583. "alcgr %[o], %[c] \n\t" \
  4584. : [l] "+r" (vl), [h] "+r" (vh), [o] "+r" (vo) \
  4585. : [a] "r" (va), [b] "r" (vb), [c] "r" (vc) \
  4586. : "cc" \
  4587. )
  4588. #define SP_INT_ASM_AVAILABLE
  4589. #endif /* WOLFSSL_SP_S390X && SP_WORD_SIZE == 64 */
  4590. #ifdef SP_INT_ASM_AVAILABLE
  4591. #ifndef SP_INT_NO_ASM
  4592. #define SQR_MUL_ASM
  4593. #endif
  4594. #ifndef SP_ASM_ADDC_REG
  4595. #define SP_ASM_ADDC_REG SP_ASM_ADDC
  4596. #endif /* SP_ASM_ADDC_REG */
  4597. #ifndef SP_ASM_SUBB_REG
  4598. #define SP_ASM_SUBB_REG SP_ASM_SUBB
  4599. #endif /* SP_ASM_ADDC_REG */
  4600. #endif /* SQR_MUL_ASM */
  4601. #endif /* !WOLFSSL_NO_ASM */
  4602. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || \
  4603. !defined(NO_DSA) || !defined(NO_DH) || \
  4604. (defined(HAVE_ECC) && defined(HAVE_COMP_KEY)) || defined(OPENSSL_EXTRA) || \
  4605. (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  4606. #ifndef WC_NO_CACHE_RESISTANT
  4607. /* Mask of address for constant time operations. */
  4608. const size_t sp_off_on_addr[2] =
  4609. {
  4610. (size_t) 0,
  4611. (size_t)-1
  4612. };
  4613. #endif
  4614. #endif
  4615. #if defined(WOLFSSL_HAVE_SP_DH) || defined(WOLFSSL_HAVE_SP_RSA)
  4616. #ifdef __cplusplus
  4617. extern "C" {
  4618. #endif
  4619. /* Modular exponentiation implementations using Single Precision. */
  4620. WOLFSSL_LOCAL int sp_ModExp_1024(sp_int* base, sp_int* exp, sp_int* mod,
  4621. sp_int* res);
  4622. WOLFSSL_LOCAL int sp_ModExp_1536(sp_int* base, sp_int* exp, sp_int* mod,
  4623. sp_int* res);
  4624. WOLFSSL_LOCAL int sp_ModExp_2048(sp_int* base, sp_int* exp, sp_int* mod,
  4625. sp_int* res);
  4626. WOLFSSL_LOCAL int sp_ModExp_3072(sp_int* base, sp_int* exp, sp_int* mod,
  4627. sp_int* res);
  4628. WOLFSSL_LOCAL int sp_ModExp_4096(sp_int* base, sp_int* exp, sp_int* mod,
  4629. sp_int* res);
  4630. #ifdef __cplusplus
  4631. } /* extern "C" */
  4632. #endif
  4633. #endif /* WOLFSSL_HAVE_SP_DH || WOLFSSL_HAVE_SP_RSA */
  4634. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  4635. defined(OPENSSL_ALL)
  4636. static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp);
  4637. #endif
  4638. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  4639. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
  4640. defined(OPENSSL_ALL)
  4641. static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho);
  4642. #endif
  4643. /* Determine when mp_add_d is required. */
  4644. #if !defined(NO_PWDBASED) || defined(WOLFSSL_KEY_GEN) || !defined(NO_DH) || \
  4645. !defined(NO_DSA) || \
  4646. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4647. defined(OPENSSL_EXTRA)
  4648. #define WOLFSSL_SP_ADD_D
  4649. #endif
  4650. /* Determine when mp_sub_d is required. */
  4651. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4652. !defined(NO_DH) || defined(HAVE_ECC) || !defined(NO_DSA)
  4653. #define WOLFSSL_SP_SUB_D
  4654. #endif
  4655. /* Determine when mp_read_radix with a radix of 10 is required. */
  4656. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
  4657. !defined(WOLFSSL_RSA_VERIFY_ONLY)) || defined(HAVE_ECC) || \
  4658. !defined(NO_DSA) || defined(OPENSSL_EXTRA)
  4659. #define WOLFSSL_SP_READ_RADIX_16
  4660. #endif
  4661. /* Determine when mp_read_radix with a radix of 10 is required. */
  4662. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
  4663. !defined(WOLFSSL_RSA_VERIFY_ONLY)
  4664. #define WOLFSSL_SP_READ_RADIX_10
  4665. #endif
  4666. /* Determine when mp_invmod is required. */
  4667. #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
  4668. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4669. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  4670. #define WOLFSSL_SP_INVMOD
  4671. #endif
  4672. /* Determine when mp_invmod_mont_ct is required. */
  4673. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  4674. #define WOLFSSL_SP_INVMOD_MONT_CT
  4675. #endif
  4676. /* Determine when mp_prime_gen is required. */
  4677. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4678. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
  4679. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  4680. #define WOLFSSL_SP_PRIME_GEN
  4681. #endif
  4682. /* Set the multi-precision number to zero.
  4683. *
  4684. * Assumes a is not NULL.
  4685. *
  4686. * @param [out] a SP integer to set to zero.
  4687. */
  4688. static void _sp_zero(sp_int* a)
  4689. {
  4690. sp_int_minimal* am = (sp_int_minimal *)a;
  4691. am->used = 0;
  4692. am->dp[0] = 0;
  4693. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4694. am->sign = MP_ZPOS;
  4695. #endif
  4696. }
  4697. /* Initialize the multi-precision number to be zero with a given max size.
  4698. *
  4699. * @param [out] a SP integer.
  4700. * @param [in] size Number of words to say are available.
  4701. */
  4702. static void _sp_init_size(sp_int* a, unsigned int size)
  4703. {
  4704. volatile sp_int_minimal* am = (sp_int_minimal *)a;
  4705. #ifdef HAVE_WOLF_BIGINT
  4706. wc_bigint_init((struct WC_BIGINT*)&am->raw);
  4707. #endif
  4708. _sp_zero((sp_int*)am);
  4709. am->size = size;
  4710. }
  4711. /* Initialize the multi-precision number to be zero with a given max size.
  4712. *
  4713. * @param [out] a SP integer.
  4714. * @param [in] size Number of words to say are available.
  4715. *
  4716. * @return MP_OKAY on success.
  4717. * @return MP_VAL when a is NULL.
  4718. */
  4719. int sp_init_size(sp_int* a, unsigned int size)
  4720. {
  4721. int err = MP_OKAY;
  4722. /* Validate parameters. Don't use size more than max compiled. */
  4723. if ((a == NULL) || ((size <= 0) || (size > SP_INT_DIGITS))) {
  4724. err = MP_VAL;
  4725. }
  4726. if (err == MP_OKAY) {
  4727. _sp_init_size(a, size);
  4728. }
  4729. return err;
  4730. }
  4731. /* Initialize the multi-precision number to be zero.
  4732. *
  4733. * @param [out] a SP integer.
  4734. *
  4735. * @return MP_OKAY on success.
  4736. * @return MP_VAL when a is NULL.
  4737. */
  4738. int sp_init(sp_int* a)
  4739. {
  4740. int err = MP_OKAY;
  4741. /* Validate parameter. */
  4742. if (a == NULL) {
  4743. err = MP_VAL;
  4744. }
  4745. else {
  4746. /* Assume complete sp_int with SP_INT_DIGITS digits. */
  4747. _sp_init_size(a, SP_INT_DIGITS);
  4748. }
  4749. return err;
  4750. }
  4751. #if !defined(WOLFSSL_RSA_PUBLIC_ONLY) || !defined(NO_DH) || defined(HAVE_ECC)
  4752. /* Initialize up to six multi-precision numbers to be zero.
  4753. *
  4754. * @param [out] n1 SP integer.
  4755. * @param [out] n2 SP integer.
  4756. * @param [out] n3 SP integer.
  4757. * @param [out] n4 SP integer.
  4758. * @param [out] n5 SP integer.
  4759. * @param [out] n6 SP integer.
  4760. *
  4761. * @return MP_OKAY on success.
  4762. */
  4763. int sp_init_multi(sp_int* n1, sp_int* n2, sp_int* n3, sp_int* n4, sp_int* n5,
  4764. sp_int* n6)
  4765. {
  4766. /* Initialize only those pointers that are valid. */
  4767. if (n1 != NULL) {
  4768. _sp_init_size(n1, SP_INT_DIGITS);
  4769. }
  4770. if (n2 != NULL) {
  4771. _sp_init_size(n2, SP_INT_DIGITS);
  4772. }
  4773. if (n3 != NULL) {
  4774. _sp_init_size(n3, SP_INT_DIGITS);
  4775. }
  4776. if (n4 != NULL) {
  4777. _sp_init_size(n4, SP_INT_DIGITS);
  4778. }
  4779. if (n5 != NULL) {
  4780. _sp_init_size(n5, SP_INT_DIGITS);
  4781. }
  4782. if (n6 != NULL) {
  4783. _sp_init_size(n6, SP_INT_DIGITS);
  4784. }
  4785. return MP_OKAY;
  4786. }
  4787. #endif /* !WOLFSSL_RSA_PUBLIC_ONLY || !NO_DH || HAVE_ECC */
  4788. /* Free the memory allocated in the multi-precision number.
  4789. *
  4790. * @param [in] a SP integer.
  4791. */
  4792. void sp_free(sp_int* a)
  4793. {
  4794. if (a != NULL) {
  4795. #ifdef HAVE_WOLF_BIGINT
  4796. wc_bigint_free(&a->raw);
  4797. #endif
  4798. }
  4799. }
  4800. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4801. !defined(NO_DH) || defined(HAVE_ECC)
  4802. /* Grow multi-precision number to be able to hold l digits.
  4803. * This function does nothing as the number of digits is fixed.
  4804. *
  4805. * @param [in,out] a SP integer.
  4806. * @param [in] l Number of digits to grow to.
  4807. *
  4808. * @return MP_OKAY on success
  4809. * @return MP_MEM if the number of digits requested is more than available.
  4810. */
  4811. int sp_grow(sp_int* a, int l)
  4812. {
  4813. int err = MP_OKAY;
  4814. /* Validate parameter. */
  4815. if ((a == NULL) || (l < 0)) {
  4816. err = MP_VAL;
  4817. }
  4818. /* Ensure enough words allocated for grow. */
  4819. if ((err == MP_OKAY) && ((unsigned int)l > a->size)) {
  4820. err = MP_MEM;
  4821. }
  4822. if (err == MP_OKAY) {
  4823. unsigned int i;
  4824. /* Put in zeros up to the new length. */
  4825. for (i = a->used; i < (unsigned int)l; i++) {
  4826. a->dp[i] = 0;
  4827. }
  4828. }
  4829. return err;
  4830. }
  4831. #endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH || HAVE_ECC */
  4832. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  4833. defined(HAVE_ECC)
  4834. /* Set the multi-precision number to zero.
  4835. *
  4836. * @param [out] a SP integer to set to zero.
  4837. */
  4838. void sp_zero(sp_int* a)
  4839. {
  4840. /* Make an sp_int with valid pointer zero. */
  4841. if (a != NULL) {
  4842. _sp_zero(a);
  4843. }
  4844. }
  4845. #endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || HAVE_ECC */
  4846. /* Clear the data from the multi-precision number, set to zero and free.
  4847. *
  4848. * @param [out] a SP integer.
  4849. */
  4850. void sp_clear(sp_int* a)
  4851. {
  4852. /* Clear when valid pointer passed in. */
  4853. if (a != NULL) {
  4854. unsigned int i;
  4855. /* Only clear the digits being used. */
  4856. for (i = 0; i < a->used; i++) {
  4857. a->dp[i] = 0;
  4858. }
  4859. /* Set back to zero and free. */
  4860. _sp_zero(a);
  4861. sp_free(a);
  4862. }
  4863. }
  4864. #if !defined(NO_RSA) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4865. !defined(NO_DSA) || defined(WOLFSSL_SP_PRIME_GEN)
  4866. /* Ensure the data in the multi-precision number is zeroed.
  4867. *
  4868. * Use when security sensitive data needs to be wiped.
  4869. *
  4870. * @param [in] a SP integer.
  4871. */
  4872. void sp_forcezero(sp_int* a)
  4873. {
  4874. /* Zeroize when a vald pointer passed in. */
  4875. if (a != NULL) {
  4876. /* Ensure all data zeroized - data not zeroed when used decreases. */
  4877. ForceZero(a->dp, a->size * SP_WORD_SIZEOF);
  4878. /* Set back to zero. */
  4879. #ifdef HAVE_WOLF_BIGINT
  4880. /* Zeroize the raw data as well. */
  4881. wc_bigint_zero(&a->raw);
  4882. #endif
  4883. /* Make value zero and free. */
  4884. _sp_zero(a);
  4885. sp_free(a);
  4886. }
  4887. }
  4888. #endif /* !WOLFSSL_RSA_VERIFY_ONLY || !NO_DH || HAVE_ECC */
  4889. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  4890. !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY)
  4891. /* Copy value of multi-precision number a into r.
  4892. *
  4893. * @param [in] a SP integer - source.
  4894. * @param [out] r SP integer - destination.
  4895. */
  4896. static void _sp_copy(const sp_int* a, sp_int* r)
  4897. {
  4898. /* Copy words across. */
  4899. if (a->used == 0) {
  4900. r->dp[0] = 0;
  4901. }
  4902. else {
  4903. XMEMCPY(r->dp, a->dp, a->used * SP_WORD_SIZEOF);
  4904. }
  4905. /* Set number of used words in result. */
  4906. r->used = a->used;
  4907. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4908. /* Set sign of result. */
  4909. r->sign = a->sign;
  4910. #endif
  4911. }
  4912. /* Copy value of multi-precision number a into r.
  4913. *
  4914. * @param [in] a SP integer - source.
  4915. * @param [out] r SP integer - destination.
  4916. *
  4917. * @return MP_OKAY on success.
  4918. */
  4919. int sp_copy(const sp_int* a, sp_int* r)
  4920. {
  4921. int err = MP_OKAY;
  4922. /* Validate parameters. */
  4923. if ((a == NULL) || (r == NULL)) {
  4924. err = MP_VAL;
  4925. }
  4926. /* Only copy if different pointers. */
  4927. if (a != r) {
  4928. /* Validated space in result. */
  4929. if ((err == MP_OKAY) && (a->used > r->size)) {
  4930. err = MP_VAL;
  4931. }
  4932. if (err == MP_OKAY) {
  4933. _sp_copy(a, r);
  4934. }
  4935. }
  4936. return err;
  4937. }
  4938. #endif
  4939. #if ((defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  4940. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
  4941. defined(OPENSSL_ALL)) && defined(WC_PROTECT_ENCRYPTED_MEM)
  4942. /* Copy 2 numbers into two results based on y. Copy a fixed number of digits.
  4943. *
  4944. * Constant time implementation.
  4945. * When y is 0, r1 = a2 and r2 = a1.
  4946. * When y is 1, r1 = a1 and r2 = a2.
  4947. *
  4948. * @param [in] a1 First number to copy.
  4949. * @param [in] a2 Second number to copy.
  4950. * @param [out] r1 First result number to copy into.
  4951. * @param [out] r2 Second result number to copy into.
  4952. * @param [in] y Indicates which number goes into which result number.
  4953. * @param [in] used Number of digits to copy.
  4954. */
  4955. static void _sp_copy_2_ct(const sp_int* a1, const sp_int* a2, sp_int* r1,
  4956. sp_int* r2, int y, unsigned int used)
  4957. {
  4958. unsigned int i;
  4959. /* Copy data - constant time. */
  4960. for (i = 0; i < used; i++) {
  4961. r1->dp[i] = (a1->dp[i] & ((sp_digit)wc_off_on_addr[y ])) +
  4962. (a2->dp[i] & ((sp_digit)wc_off_on_addr[y^1]));
  4963. r2->dp[i] = (a1->dp[i] & ((sp_digit)wc_off_on_addr[y^1])) +
  4964. (a2->dp[i] & ((sp_digit)wc_off_on_addr[y ]));
  4965. }
  4966. /* Copy used. */
  4967. r1->used = (a1->used & ((int)wc_off_on_addr[y ])) +
  4968. (a2->used & ((int)wc_off_on_addr[y^1]));
  4969. r2->used = (a1->used & ((int)wc_off_on_addr[y^1])) +
  4970. (a2->used & ((int)wc_off_on_addr[y ]));
  4971. #ifdef WOLFSSL_SP_INT_NEGATIVE
  4972. /* Copy sign. */
  4973. r1->sign = (a1->sign & ((int)wc_off_on_addr[y ])) +
  4974. (a2->sign & ((int)wc_off_on_addr[y^1]));
  4975. r2->sign = (a1->sign & ((int)wc_off_on_addr[y^1])) +
  4976. (a2->sign & ((int)wc_off_on_addr[y ]));
  4977. #endif
  4978. }
  4979. #endif
  4980. #if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC))
  4981. /* Initializes r and copies in value from a.
  4982. *
  4983. * @param [out] r SP integer - destination.
  4984. * @param [in] a SP integer - source.
  4985. *
  4986. * @return MP_OKAY on success.
  4987. * @return MP_VAL when a or r is NULL.
  4988. */
  4989. int sp_init_copy(sp_int* r, const sp_int* a)
  4990. {
  4991. int err;
  4992. /* Initialize r and copy value in a into it. */
  4993. err = sp_init(r);
  4994. if (err == MP_OKAY) {
  4995. err = sp_copy(a, r);
  4996. }
  4997. return err;
  4998. }
  4999. #endif /* WOLFSSL_SP_MATH_ALL || (HAVE_ECC && FP_ECC) */
  5000. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5001. !defined(NO_DH) || !defined(NO_DSA)
  5002. /* Exchange the values in a and b.
  5003. *
  5004. * Avoid using this API as three copy operations are performed.
  5005. *
  5006. * @param [in,out] a SP integer to swap.
  5007. * @param [in,out] b SP integer to swap.
  5008. *
  5009. * @return MP_OKAY on success.
  5010. * @return MP_VAL when a or b is NULL.
  5011. * @return MP_MEM when dynamic memory allocation fails.
  5012. */
  5013. int sp_exch(sp_int* a, sp_int* b)
  5014. {
  5015. int err = MP_OKAY;
  5016. /* Validate parameters. */
  5017. if ((a == NULL) || (b == NULL)) {
  5018. err = MP_VAL;
  5019. }
  5020. /* Check space for a in b and b in a. */
  5021. if ((err == MP_OKAY) && ((a->size < b->used) || (b->size < a->used))) {
  5022. err = MP_VAL;
  5023. }
  5024. if (err == MP_OKAY) {
  5025. /* Declare temporary for swapping. */
  5026. DECL_SP_INT(t, a->used);
  5027. /* Create temporary for swapping. */
  5028. ALLOC_SP_INT(t, a->used, err, NULL);
  5029. if (err == MP_OKAY) {
  5030. /* Cache allocated size of a and b. */
  5031. unsigned int asize = a->size;
  5032. unsigned int bsize = b->size;
  5033. /* Copy all of SP int: t <- a, a <- b, b <- t. */
  5034. XMEMCPY(t, a, MP_INT_SIZEOF(a->used));
  5035. XMEMCPY(a, b, MP_INT_SIZEOF(b->used));
  5036. XMEMCPY(b, t, MP_INT_SIZEOF(t->used));
  5037. /* Put back size of a and b. */
  5038. a->size = asize;
  5039. b->size = bsize;
  5040. }
  5041. FREE_SP_INT(t, NULL);
  5042. }
  5043. return err;
  5044. }
  5045. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || !NO_DH ||
  5046. * !NO_DSA */
  5047. #if defined(HAVE_ECC) && defined(ECC_TIMING_RESISTANT) && \
  5048. !defined(WC_NO_CACHE_RESISTANT)
  5049. /* Conditional swap of SP int values in constant time.
  5050. *
  5051. * @param [in] a First SP int to conditionally swap.
  5052. * @param [in] b Second SP int to conditionally swap.
  5053. * @param [in] cnt Count of words to copy.
  5054. * @param [in] swap When value is 1 then swap.
  5055. * @return MP_OKAY on success.
  5056. * @return MP_MEM when dynamic memory allocation fails.
  5057. */
  5058. int sp_cond_swap_ct(sp_int* a, sp_int* b, int cnt, int swap)
  5059. {
  5060. unsigned int i;
  5061. int err = MP_OKAY;
  5062. sp_int_digit mask = (sp_int_digit)0 - (sp_int_digit)swap;
  5063. DECL_SP_INT(t, (size_t)cnt);
  5064. /* Allocate temporary to hold masked xor of a and b. */
  5065. ALLOC_SP_INT(t, cnt, err, NULL);
  5066. if (err == MP_OKAY) {
  5067. /* XOR other fields in sp_int into temp - mask set when swapping. */
  5068. t->used = (a->used ^ b->used) & (unsigned int)mask;
  5069. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5070. t->sign = (a->sign ^ b->sign) & (unsigned int)mask;
  5071. #endif
  5072. /* XOR requested words into temp - mask set when swapping. */
  5073. for (i = 0; i < (unsigned int)cnt; i++) {
  5074. t->dp[i] = (a->dp[i] ^ b->dp[i]) & mask;
  5075. }
  5076. /* XOR temporary - when mask set then result will be b. */
  5077. a->used ^= t->used;
  5078. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5079. a->sign ^= t->sign;
  5080. #endif
  5081. for (i = 0; i < (unsigned int)cnt; i++) {
  5082. a->dp[i] ^= t->dp[i];
  5083. }
  5084. /* XOR temporary - when mask set then result will be a. */
  5085. b->used ^= t->used;
  5086. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5087. b->sign ^= b->sign;
  5088. #endif
  5089. for (i = 0; i < (unsigned int)cnt; i++) {
  5090. b->dp[i] ^= t->dp[i];
  5091. }
  5092. }
  5093. FREE_SP_INT(t, NULL);
  5094. return err;
  5095. }
  5096. #endif /* HAVE_ECC && ECC_TIMING_RESISTANT && !WC_NO_CACHE_RESISTANT */
  5097. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5098. /* Calculate the absolute value of the multi-precision number.
  5099. *
  5100. * @param [in] a SP integer to calculate absolute value of.
  5101. * @param [out] r SP integer to hold result.
  5102. *
  5103. * @return MP_OKAY on success.
  5104. * @return MP_VAL when a or r is NULL.
  5105. */
  5106. int sp_abs(const sp_int* a, sp_int* r)
  5107. {
  5108. int err;
  5109. /* Copy a into r - copy fails when r is NULL. */
  5110. err = sp_copy(a, r);
  5111. if (err == MP_OKAY) {
  5112. r->sign = MP_ZPOS;
  5113. }
  5114. return err;
  5115. }
  5116. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  5117. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  5118. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  5119. /* Compare absolute value of two multi-precision numbers.
  5120. *
  5121. * @param [in] a SP integer.
  5122. * @param [in] b SP integer.
  5123. *
  5124. * @return MP_GT when a is greater than b.
  5125. * @return MP_LT when a is less than b.
  5126. * @return MP_EQ when a is equals b.
  5127. */
  5128. static int _sp_cmp_abs(const sp_int* a, const sp_int* b)
  5129. {
  5130. int ret = MP_EQ;
  5131. /* Check number of words first. */
  5132. if (a->used > b->used) {
  5133. ret = MP_GT;
  5134. }
  5135. else if (a->used < b->used) {
  5136. ret = MP_LT;
  5137. }
  5138. else {
  5139. int i;
  5140. /* Starting from most significant word, compare words.
  5141. * Stop when different and set comparison return.
  5142. */
  5143. for (i = (int)(a->used - 1); i >= 0; i--) {
  5144. if (a->dp[i] > b->dp[i]) {
  5145. ret = MP_GT;
  5146. break;
  5147. }
  5148. else if (a->dp[i] < b->dp[i]) {
  5149. ret = MP_LT;
  5150. break;
  5151. }
  5152. }
  5153. /* If we made to the end then ret is MP_EQ from initialization. */
  5154. }
  5155. return ret;
  5156. }
  5157. #endif
  5158. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  5159. /* Compare absolute value of two multi-precision numbers.
  5160. *
  5161. * Pointers are compared such that NULL is less than not NULL.
  5162. *
  5163. * @param [in] a SP integer.
  5164. * @param [in] b SP integer.
  5165. *
  5166. * @return MP_GT when a is greater than b.
  5167. * @return MP_LT when a is less than b.
  5168. * @return MP_EQ when a equals b.
  5169. */
  5170. int sp_cmp_mag(const sp_int* a, const sp_int* b)
  5171. {
  5172. int ret;
  5173. /* Do pointer checks first. Both NULL returns equal. */
  5174. if (a == b) {
  5175. ret = MP_EQ;
  5176. }
  5177. /* Nothing is smaller than something. */
  5178. else if (a == NULL) {
  5179. ret = MP_LT;
  5180. }
  5181. /* Something is larger than nothing. */
  5182. else if (b == NULL) {
  5183. ret = MP_GT;
  5184. }
  5185. else
  5186. {
  5187. /* Compare values - a and b are not NULL. */
  5188. ret = _sp_cmp_abs(a, b);
  5189. }
  5190. return ret;
  5191. }
  5192. #endif
  5193. #if defined(WOLFSSL_SP_MATH_ALL) || defined(HAVE_ECC) || !defined(NO_DSA) || \
  5194. defined(OPENSSL_EXTRA) || !defined(NO_DH) || \
  5195. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  5196. /* Compare two multi-precision numbers.
  5197. *
  5198. * Assumes a and b are not NULL.
  5199. *
  5200. * @param [in] a SP integer.
  5201. * @param [in] a SP integer.
  5202. *
  5203. * @return MP_GT when a is greater than b.
  5204. * @return MP_LT when a is less than b.
  5205. * @return MP_EQ when a is equals b.
  5206. */
  5207. static int _sp_cmp(const sp_int* a, const sp_int* b)
  5208. {
  5209. int ret;
  5210. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5211. /* Check sign first. */
  5212. if (a->sign > b->sign) {
  5213. ret = MP_LT;
  5214. }
  5215. else if (a->sign < b->sign) {
  5216. ret = MP_GT;
  5217. }
  5218. else /* (a->sign == b->sign) */ {
  5219. #endif
  5220. /* Compare values. */
  5221. ret = _sp_cmp_abs(a, b);
  5222. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5223. if (a->sign == MP_NEG) {
  5224. /* MP_GT = 1, MP_LT = -1, MP_EQ = 0
  5225. * Swapping MP_GT and MP_LT results.
  5226. */
  5227. ret = -ret;
  5228. }
  5229. }
  5230. #endif
  5231. return ret;
  5232. }
  5233. #endif
  5234. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5235. !defined(NO_DSA) || defined(HAVE_ECC) || !defined(NO_DH) || \
  5236. defined(WOLFSSL_SP_MATH_ALL)
  5237. /* Compare two multi-precision numbers.
  5238. *
  5239. * Pointers are compared such that NULL is less than not NULL.
  5240. *
  5241. * @param [in] a SP integer.
  5242. * @param [in] a SP integer.
  5243. *
  5244. * @return MP_GT when a is greater than b.
  5245. * @return MP_LT when a is less than b.
  5246. * @return MP_EQ when a is equals b.
  5247. */
  5248. int sp_cmp(const sp_int* a, const sp_int* b)
  5249. {
  5250. int ret;
  5251. /* Check pointers first. Both NULL returns equal. */
  5252. if (a == b) {
  5253. ret = MP_EQ;
  5254. }
  5255. /* Nothing is smaller than something. */
  5256. else if (a == NULL) {
  5257. ret = MP_LT;
  5258. }
  5259. /* Something is larger than nothing. */
  5260. else if (b == NULL) {
  5261. ret = MP_GT;
  5262. }
  5263. else
  5264. {
  5265. /* Compare values - a and b are not NULL. */
  5266. ret = _sp_cmp(a, b);
  5267. }
  5268. return ret;
  5269. }
  5270. #endif
  5271. /*************************
  5272. * Bit check/set functions
  5273. *************************/
  5274. #if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5275. (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)) || \
  5276. defined(OPENSSL_EXTRA)
  5277. /* Check if a bit is set
  5278. *
  5279. * When a is NULL, result is 0.
  5280. *
  5281. * @param [in] a SP integer.
  5282. * @param [in] b Bit position to check.
  5283. *
  5284. * @return 0 when bit is not set.
  5285. * @return 1 when bit is set.
  5286. */
  5287. int sp_is_bit_set(const sp_int* a, unsigned int b)
  5288. {
  5289. int ret = 0;
  5290. /* Index of word. */
  5291. unsigned int i = b >> SP_WORD_SHIFT;
  5292. /* Check parameters. */
  5293. if ((a != NULL) && (i < a->used)) {
  5294. /* Shift amount to get bit down to index 0. */
  5295. unsigned int s = b & SP_WORD_MASK;
  5296. /* Get and mask bit. */
  5297. ret = (int)((a->dp[i] >> s) & (sp_int_digit)1);
  5298. }
  5299. return ret;
  5300. }
  5301. #endif /* (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) ||
  5302. * (WOLFSSL_SP_MATH_ALL && HAVE_ECC) */
  5303. /* Count the number of bits in the multi-precision number.
  5304. *
  5305. * When a is NULL, result is 0.
  5306. *
  5307. * @param [in] a SP integer.
  5308. *
  5309. * @return Number of bits in the SP integer value.
  5310. */
  5311. int sp_count_bits(const sp_int* a)
  5312. {
  5313. int n = -1;
  5314. /* Check parameter. */
  5315. if ((a != NULL) && (a->used > 0)) {
  5316. /* Get index of last word. */
  5317. n = (int)(a->used - 1);
  5318. /* Don't count leading zeros. */
  5319. while ((n >= 0) && (a->dp[n] == 0)) {
  5320. n--;
  5321. }
  5322. }
  5323. /* -1 indicates SP integer value was zero. */
  5324. if (n < 0) {
  5325. n = 0;
  5326. }
  5327. else {
  5328. /* Get the most significant word. */
  5329. sp_int_digit d = a->dp[n];
  5330. /* Count of bits up to last word. */
  5331. n *= SP_WORD_SIZE;
  5332. #ifdef SP_ASM_HI_BIT_SET_IDX
  5333. {
  5334. sp_int_digit hi;
  5335. /* Get index of highest set bit. */
  5336. SP_ASM_HI_BIT_SET_IDX(d, hi);
  5337. /* Add bits up to and including index. */
  5338. n += (int)hi + 1;
  5339. }
  5340. #elif defined(SP_ASM_LZCNT)
  5341. {
  5342. sp_int_digit lz;
  5343. /* Count number of leading zeros in highest non-zero digit. */
  5344. SP_ASM_LZCNT(d, lz);
  5345. /* Add non-leading zero bits count. */
  5346. n += SP_WORD_SIZE - (int)lz;
  5347. }
  5348. #else
  5349. /* Check if top word has more than half the bits set. */
  5350. if (d > SP_HALF_MAX) {
  5351. /* Set count to a full last word. */
  5352. n += SP_WORD_SIZE;
  5353. /* Don't count leading zero bits. */
  5354. while ((d & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) == 0) {
  5355. n--;
  5356. d <<= 1;
  5357. }
  5358. }
  5359. else {
  5360. /* Add to count until highest set bit is shifted out. */
  5361. while (d != 0) {
  5362. n++;
  5363. d >>= 1;
  5364. }
  5365. }
  5366. #endif
  5367. }
  5368. return n;
  5369. }
  5370. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  5371. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
  5372. (defined(HAVE_ECC) && defined(FP_ECC)) || \
  5373. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  5374. /* Number of entries in array of number of least significant zero bits. */
  5375. #define SP_LNZ_CNT 16
  5376. /* Number of bits the array checks. */
  5377. #define SP_LNZ_BITS 4
  5378. /* Mask to apply to check with array. */
  5379. #define SP_LNZ_MASK 0xf
  5380. /* Number of least significant zero bits in first SP_LNZ_CNT numbers. */
  5381. static const int sp_lnz[SP_LNZ_CNT] = {
  5382. 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
  5383. };
  5384. /* Count the number of least significant zero bits.
  5385. *
  5386. * When a is not NULL, result is 0.
  5387. *
  5388. * @param [in] a SP integer to use.
  5389. *
  5390. * @return Number of least significant zero bits.
  5391. */
  5392. #if !defined(HAVE_ECC) || !defined(HAVE_COMP_KEY)
  5393. static
  5394. #endif /* !HAVE_ECC || HAVE_COMP_KEY */
  5395. int sp_cnt_lsb(const sp_int* a)
  5396. {
  5397. unsigned int bc = 0;
  5398. /* Check for number with a value. */
  5399. if ((a != NULL) && (!sp_iszero(a))) {
  5400. unsigned int i;
  5401. unsigned int j;
  5402. /* Count least significant words that are zero. */
  5403. for (i = 0; i < a->used && a->dp[i] == 0; i++, bc += SP_WORD_SIZE) {
  5404. }
  5405. /* Use 4-bit table to get count. */
  5406. for (j = 0; j < SP_WORD_SIZE; j += SP_LNZ_BITS) {
  5407. /* Get number of lesat significant 0 bits in nibble. */
  5408. int cnt = sp_lnz[(a->dp[i] >> j) & SP_LNZ_MASK];
  5409. /* Done if not all 4 bits are zero. */
  5410. if (cnt != 4) {
  5411. /* Add checked bits and count in last 4 bits checked. */
  5412. bc += j + (unsigned int)cnt;
  5413. break;
  5414. }
  5415. }
  5416. }
  5417. return (int)bc;
  5418. }
  5419. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || (HAVE_ECC && FP_ECC) */
  5420. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_ASN_TEMPLATE) || \
  5421. (defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_ASN))
  5422. /* Determine if the most significant byte of the encoded multi-precision number
  5423. * has the top bit set.
  5424. *
  5425. * When a is NULL, result is 0.
  5426. *
  5427. * @param [in] a SP integer.
  5428. *
  5429. * @return 1 when the top bit of top byte is set.
  5430. * @return 0 when the top bit of top byte is not set.
  5431. */
  5432. int sp_leading_bit(const sp_int* a)
  5433. {
  5434. int bit = 0;
  5435. /* Check if we have a number and value to use. */
  5436. if ((a != NULL) && (a->used > 0)) {
  5437. /* Get top word. */
  5438. sp_int_digit d = a->dp[a->used - 1];
  5439. #if SP_WORD_SIZE > 8
  5440. /* Remove bottom 8 bits until highest 8 bits left. */
  5441. while (d > (sp_int_digit)0xff) {
  5442. d >>= 8;
  5443. }
  5444. #endif
  5445. /* Get the highest bit of the 8-bit value. */
  5446. bit = (int)(d >> 7);
  5447. }
  5448. return bit;
  5449. }
  5450. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  5451. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  5452. defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) || defined(OPENSSL_EXTRA) || \
  5453. !defined(NO_RSA)
  5454. /* Set one bit of a: a |= 1 << i
  5455. * The field 'used' is updated in a.
  5456. *
  5457. * @param [in,out] a SP integer to set bit into.
  5458. * @param [in] i Index of bit to set.
  5459. *
  5460. * @return MP_OKAY on success.
  5461. * @return MP_VAL when a is NULL, index is negative or index is too large.
  5462. */
  5463. int sp_set_bit(sp_int* a, int i)
  5464. {
  5465. int err = MP_OKAY;
  5466. /* Get index of word to set. */
  5467. unsigned int w = (unsigned int)(i >> SP_WORD_SHIFT);
  5468. /* Check for valid number and and space for bit. */
  5469. if ((a == NULL) || (i < 0) || (w >= a->size)) {
  5470. err = MP_VAL;
  5471. }
  5472. if (err == MP_OKAY) {
  5473. /* Amount to shift up to set bit in word. */
  5474. unsigned int s = (unsigned int)(i & (SP_WORD_SIZE - 1));
  5475. unsigned int j;
  5476. /* Set to zero all unused words up to and including word to have bit
  5477. * set.
  5478. */
  5479. for (j = a->used; j <= w; j++) {
  5480. a->dp[j] = 0;
  5481. }
  5482. /* Set bit in word. */
  5483. a->dp[w] |= (sp_int_digit)1 << s;
  5484. /* Update used if necessary */
  5485. if (a->used <= w) {
  5486. a->used = w + 1;
  5487. }
  5488. }
  5489. return err;
  5490. }
  5491. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
  5492. * WOLFSSL_KEY_GEN || OPENSSL_EXTRA || !NO_RSA */
  5493. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5494. defined(WOLFSSL_KEY_GEN) || !defined(NO_DH)
  5495. /* Exponentiate 2 to the power of e: a = 2^e
  5496. * This is done by setting the 'e'th bit.
  5497. *
  5498. * @param [out] a SP integer to hold result.
  5499. * @param [in] e Exponent.
  5500. *
  5501. * @return MP_OKAY on success.
  5502. * @return MP_VAL when a is NULL, e is negative or 2^exponent is too large.
  5503. */
  5504. int sp_2expt(sp_int* a, int e)
  5505. {
  5506. int err = MP_OKAY;
  5507. /* Validate parameters. */
  5508. if ((a == NULL) || (e < 0)) {
  5509. err = MP_VAL;
  5510. }
  5511. if (err == MP_OKAY) {
  5512. /* Set number to zero and then set bit. */
  5513. _sp_zero(a);
  5514. err = sp_set_bit(a, e);
  5515. }
  5516. return err;
  5517. }
  5518. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  5519. * WOLFSSL_KEY_GEN || !NO_DH */
  5520. /**********************
  5521. * Digit/Long functions
  5522. **********************/
  5523. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || !defined(NO_DH) || \
  5524. defined(HAVE_ECC)
  5525. /* Set the multi-precision number to be the value of the digit.
  5526. *
  5527. * @param [out] a SP integer to become number.
  5528. * @param [in] d Digit to be set.
  5529. */
  5530. static void _sp_set(sp_int* a, sp_int_digit d)
  5531. {
  5532. /* Use sp_int_minimal to support allocated byte arrays as sp_ints. */
  5533. sp_int_minimal* am = (sp_int_minimal*)a;
  5534. am->dp[0] = d;
  5535. /* d == 0 => used = 0, d > 0 => used = 1 */
  5536. am->used = (d > 0);
  5537. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5538. am->sign = MP_ZPOS;
  5539. #endif
  5540. }
  5541. /* Set the multi-precision number to be the value of the digit.
  5542. *
  5543. * @param [out] a SP integer to become number.
  5544. * @param [in] d Digit to be set.
  5545. *
  5546. * @return MP_OKAY on success.
  5547. * @return MP_VAL when a is NULL.
  5548. */
  5549. int sp_set(sp_int* a, sp_int_digit d)
  5550. {
  5551. int err = MP_OKAY;
  5552. /* Validate parameters. */
  5553. if (a == NULL) {
  5554. err = MP_VAL;
  5555. }
  5556. if (err == MP_OKAY) {
  5557. _sp_set(a, d);
  5558. }
  5559. return err;
  5560. }
  5561. #endif
  5562. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_RSA) || defined(OPENSSL_EXTRA)
  5563. /* Set a number into the multi-precision number.
  5564. *
  5565. * Number may be larger than the size of a digit.
  5566. *
  5567. * @param [out] a SP integer to set.
  5568. * @param [in] n Long value to set.
  5569. *
  5570. * @return MP_OKAY on success.
  5571. * @return MP_VAL when a is NULL.
  5572. */
  5573. int sp_set_int(sp_int* a, unsigned long n)
  5574. {
  5575. int err = MP_OKAY;
  5576. if (a == NULL) {
  5577. err = MP_VAL;
  5578. }
  5579. if (err == MP_OKAY) {
  5580. #if SP_WORD_SIZE < SP_ULONG_BITS
  5581. /* Assign if value first in one word. */
  5582. if (n <= (sp_int_digit)SP_DIGIT_MAX) {
  5583. #endif
  5584. a->dp[0] = (sp_int_digit)n;
  5585. a->used = (n != 0);
  5586. #if SP_WORD_SIZE < SP_ULONG_BITS
  5587. }
  5588. else {
  5589. unsigned int i;
  5590. /* Assign value word by word. */
  5591. for (i = 0; (i < a->size) && (n > 0); i++,n >>= SP_WORD_SIZE) {
  5592. a->dp[i] = (sp_int_digit)n;
  5593. }
  5594. /* Update number of words used. */
  5595. a->used = i;
  5596. /* Check for overflow. */
  5597. if ((i == a->size) && (n != 0)) {
  5598. err = MP_VAL;
  5599. }
  5600. }
  5601. #endif
  5602. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5603. a->sign = MP_ZPOS;
  5604. #endif
  5605. }
  5606. return err;
  5607. }
  5608. #endif /* WOLFSSL_SP_MATH_ALL || !NO_RSA */
  5609. #if defined(WOLFSSL_SP_MATH_ALL) || \
  5610. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5611. !defined(NO_DH) || defined(HAVE_ECC)
  5612. /* Compare a one digit number with a multi-precision number.
  5613. *
  5614. * When a is NULL, MP_LT is returned.
  5615. *
  5616. * @param [in] a SP integer to compare.
  5617. * @param [in] d Digit to compare with.
  5618. *
  5619. * @return MP_GT when a is greater than d.
  5620. * @return MP_LT when a is less than d.
  5621. * @return MP_EQ when a is equals d.
  5622. */
  5623. int sp_cmp_d(const sp_int* a, sp_int_digit d)
  5624. {
  5625. int ret = MP_EQ;
  5626. /* No SP integer is always less - even when d is zero. */
  5627. if (a == NULL) {
  5628. ret = MP_LT;
  5629. }
  5630. else
  5631. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5632. /* Check sign first. */
  5633. if (a->sign == MP_NEG) {
  5634. ret = MP_LT;
  5635. }
  5636. else
  5637. #endif
  5638. {
  5639. /* Check if SP integer as more than one word. */
  5640. if (a->used > 1) {
  5641. ret = MP_GT;
  5642. }
  5643. /* Special case for zero. */
  5644. else if (a->used == 0) {
  5645. if (d != 0) {
  5646. ret = MP_LT;
  5647. }
  5648. /* ret initialized to equal. */
  5649. }
  5650. else {
  5651. /* The single word in the SP integer can now be compared with d. */
  5652. if (a->dp[0] > d) {
  5653. ret = MP_GT;
  5654. }
  5655. else if (a->dp[0] < d) {
  5656. ret = MP_LT;
  5657. }
  5658. /* ret initialized to equal. */
  5659. }
  5660. }
  5661. return ret;
  5662. }
  5663. #endif
  5664. #if defined(WOLFSSL_SP_ADD_D) || (defined(WOLFSSL_SP_INT_NEGATIVE) && \
  5665. defined(WOLFSSL_SP_SUB_D)) || defined(WOLFSSL_SP_READ_RADIX_10)
  5666. /* Add a one digit number to the multi-precision number.
  5667. *
  5668. * @param [in] a SP integer be added to.
  5669. * @param [in] d Digit to add.
  5670. * @param [out] r SP integer to store result in.
  5671. *
  5672. * @return MP_OKAY on success.
  5673. * @return MP_VAL when result is too large for fixed size dp array.
  5674. */
  5675. static int _sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5676. {
  5677. int err = MP_OKAY;
  5678. /* Special case of zero means we want result to have a digit when not adding
  5679. * zero. */
  5680. if (a->used == 0) {
  5681. r->dp[0] = d;
  5682. r->used = (d > 0);
  5683. }
  5684. else {
  5685. unsigned int i = 0;
  5686. sp_int_digit a0 = a->dp[0];
  5687. /* Set used of result - updated if overflow seen. */
  5688. r->used = a->used;
  5689. r->dp[0] = a0 + d;
  5690. /* Check for carry. */
  5691. if (r->dp[0] < a0) {
  5692. /* Do carry through all words. */
  5693. for (++i; i < a->used; i++) {
  5694. r->dp[i] = a->dp[i] + 1;
  5695. if (r->dp[i] != 0) {
  5696. break;
  5697. }
  5698. }
  5699. /* Add another word if required. */
  5700. if (i == a->used) {
  5701. /* Check result has enough space for another word. */
  5702. if (i < r->size) {
  5703. r->used++;
  5704. r->dp[i] = 1;
  5705. }
  5706. else {
  5707. err = MP_VAL;
  5708. }
  5709. }
  5710. }
  5711. /* When result is not the same as input, copy rest of digits. */
  5712. if ((err == MP_OKAY) && (r != a)) {
  5713. /* Copy any words that didn't update with carry. */
  5714. for (++i; i < a->used; i++) {
  5715. r->dp[i] = a->dp[i];
  5716. }
  5717. }
  5718. }
  5719. return err;
  5720. }
  5721. #endif /* WOLFSSL_SP_ADD_D || (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_SUB_D) ||
  5722. * defined(WOLFSSL_SP_READ_RADIX_10) */
  5723. #if (defined(WOLFSSL_SP_INT_NEGATIVE) && defined(WOLFSSL_SP_ADD_D)) || \
  5724. defined(WOLFSSL_SP_SUB_D) || defined(WOLFSSL_SP_INVMOD) || \
  5725. defined(WOLFSSL_SP_INVMOD_MONT_CT) || (defined(WOLFSSL_SP_PRIME_GEN) && \
  5726. !defined(WC_NO_RNG))
  5727. /* Sub a one digit number from the multi-precision number.
  5728. *
  5729. * @param [in] a SP integer be subtracted from.
  5730. * @param [in] d Digit to subtract.
  5731. * @param [out] r SP integer to store result in.
  5732. */
  5733. static void _sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5734. {
  5735. /* Set result used to be same as input. Updated with clamp. */
  5736. r->used = a->used;
  5737. /* Only possible when not handling negatives. */
  5738. if (a->used == 0) {
  5739. /* Set result to zero as no negative support. */
  5740. r->dp[0] = 0;
  5741. }
  5742. else {
  5743. unsigned int i = 0;
  5744. sp_int_digit a0 = a->dp[0];
  5745. r->dp[0] = a0 - d;
  5746. /* Check for borrow. */
  5747. if (r->dp[0] > a0) {
  5748. /* Do borrow through all words. */
  5749. for (++i; i < a->used; i++) {
  5750. r->dp[i] = a->dp[i] - 1;
  5751. if (r->dp[i] != SP_DIGIT_MAX) {
  5752. break;
  5753. }
  5754. }
  5755. }
  5756. /* When result is not the same as input, copy rest of digits. */
  5757. if (r != a) {
  5758. /* Copy any words that didn't update with borrow. */
  5759. for (++i; i < a->used; i++) {
  5760. r->dp[i] = a->dp[i];
  5761. }
  5762. }
  5763. /* Remove leading zero words. */
  5764. sp_clamp(r);
  5765. }
  5766. }
  5767. #endif /* (WOLFSSL_SP_INT_NEGATIVE && WOLFSSL_SP_ADD_D) || WOLFSSL_SP_SUB_D
  5768. * WOLFSSL_SP_INVMOD || WOLFSSL_SP_INVMOD_MONT_CT ||
  5769. * WOLFSSL_SP_PRIME_GEN */
  5770. #ifdef WOLFSSL_SP_ADD_D
  5771. /* Add a one digit number to the multi-precision number.
  5772. *
  5773. * @param [in] a SP integer be added to.
  5774. * @param [in] d Digit to add.
  5775. * @param [out] r SP integer to store result in.
  5776. *
  5777. * @return MP_OKAY on success.
  5778. * @return MP_VAL when result is too large for fixed size dp array.
  5779. */
  5780. int sp_add_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5781. {
  5782. int err = MP_OKAY;
  5783. /* Check validity of parameters. */
  5784. if ((a == NULL) || (r == NULL)) {
  5785. err = MP_VAL;
  5786. }
  5787. #ifndef WOLFSSL_SP_INT_NEGATIVE
  5788. /* Check for space in result especially when carry adds a new word. */
  5789. if ((err == MP_OKAY) && (a->used + 1 > r->size)) {
  5790. err = MP_VAL;
  5791. }
  5792. if (err == MP_OKAY) {
  5793. /* Positive only so just use internal function. */
  5794. err = _sp_add_d(a, d, r);
  5795. }
  5796. #else
  5797. /* Check for space in result especially when carry adds a new word. */
  5798. if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used + 1 > r->size)) {
  5799. err = MP_VAL;
  5800. }
  5801. /* Check for space in result - no carry but borrow possible. */
  5802. if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used > r->size)) {
  5803. err = MP_VAL;
  5804. }
  5805. if (err == MP_OKAY) {
  5806. if (a->sign == MP_ZPOS) {
  5807. /* Positive, so use internal function. */
  5808. r->sign = MP_ZPOS;
  5809. err = _sp_add_d(a, d, r);
  5810. }
  5811. else if ((a->used > 1) || (a->dp[0] > d)) {
  5812. /* Negative value bigger than digit so subtract digit. */
  5813. r->sign = MP_NEG;
  5814. _sp_sub_d(a, d, r);
  5815. }
  5816. else {
  5817. /* Negative value smaller or equal to digit. */
  5818. r->sign = MP_ZPOS;
  5819. /* Subtract negative value from digit. */
  5820. r->dp[0] = d - a->dp[0];
  5821. /* Result is a digit equal to or greater than zero. */
  5822. r->used = (r->dp[0] > 0);
  5823. }
  5824. }
  5825. #endif
  5826. return err;
  5827. }
  5828. #endif /* WOLFSSL_SP_ADD_D */
  5829. #ifdef WOLFSSL_SP_SUB_D
  5830. /* Sub a one digit number from the multi-precision number.
  5831. *
  5832. * @param [in] a SP integer be subtracted from.
  5833. * @param [in] d Digit to subtract.
  5834. * @param [out] r SP integer to store result in.
  5835. *
  5836. * @return MP_OKAY on success.
  5837. * @return MP_VAL when a or r is NULL.
  5838. */
  5839. int sp_sub_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5840. {
  5841. int err = MP_OKAY;
  5842. /* Check validity of parameters. */
  5843. if ((a == NULL) || (r == NULL)) {
  5844. err = MP_VAL;
  5845. }
  5846. #ifndef WOLFSSL_SP_INT_NEGATIVE
  5847. /* Check for space in result. */
  5848. if ((err == MP_OKAY) && (a->used > r->size)) {
  5849. err = MP_VAL;
  5850. }
  5851. if (err == MP_OKAY) {
  5852. /* Positive only so just use internal function. */
  5853. _sp_sub_d(a, d, r);
  5854. }
  5855. #else
  5856. /* Check for space in result especially when borrow adds a new word. */
  5857. if ((err == MP_OKAY) && (a->sign == MP_NEG) && (a->used + 1 > r->size)) {
  5858. err = MP_VAL;
  5859. }
  5860. /* Check for space in result - no carry but borrow possible. */
  5861. if ((err == MP_OKAY) && (a->sign == MP_ZPOS) && (a->used > r->size)) {
  5862. err = MP_VAL;
  5863. }
  5864. if (err == MP_OKAY) {
  5865. if (a->sign == MP_NEG) {
  5866. /* Subtracting from negative use internal add. */
  5867. r->sign = MP_NEG;
  5868. err = _sp_add_d(a, d, r);
  5869. }
  5870. else if ((a->used > 1) || (a->dp[0] >= d)) {
  5871. /* Positive number greater than or equal to digit - subtract digit.
  5872. */
  5873. r->sign = MP_ZPOS;
  5874. _sp_sub_d(a, d, r);
  5875. }
  5876. else {
  5877. /* Positive value smaller than digit. */
  5878. r->sign = MP_NEG;
  5879. /* Subtract positive value from digit. */
  5880. r->dp[0] = d - a->dp[0];
  5881. /* Result is a digit equal to or greater than zero. */
  5882. r->used = 1;
  5883. }
  5884. }
  5885. #endif
  5886. return err;
  5887. }
  5888. #endif /* WOLFSSL_SP_SUB_D */
  5889. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5890. defined(WOLFSSL_SP_SMALL) && (defined(WOLFSSL_SP_MATH_ALL) || \
  5891. !defined(NO_DH) || defined(HAVE_ECC) || \
  5892. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  5893. !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
  5894. (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
  5895. /* Multiply a by digit n and put result into r shifting up o digits.
  5896. * r = (a * n) << (o * SP_WORD_SIZE)
  5897. *
  5898. * @param [in] a SP integer to be multiplied.
  5899. * @param [in] d SP digit to multiply by.
  5900. * @param [out] r SP integer result.
  5901. * @param [in] o Number of digits to move result up by.
  5902. * @return MP_OKAY on success.
  5903. * @return MP_VAL when result is too large for sp_int.
  5904. */
  5905. static int _sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r, unsigned int o)
  5906. {
  5907. int err = MP_OKAY;
  5908. unsigned int i;
  5909. #ifndef SQR_MUL_ASM
  5910. sp_int_word t = 0;
  5911. #else
  5912. sp_int_digit l = 0;
  5913. sp_int_digit h = 0;
  5914. #endif
  5915. #ifdef WOLFSSL_SP_SMALL
  5916. /* Zero out offset words. */
  5917. for (i = 0; i < o; i++) {
  5918. r->dp[i] = 0;
  5919. }
  5920. #else
  5921. /* Don't use the offset. Only when doing small code size div. */
  5922. (void)o;
  5923. #endif
  5924. /* Multiply each word of a by n. */
  5925. for (i = 0; i < a->used; i++, o++) {
  5926. #ifndef SQR_MUL_ASM
  5927. /* Add product to top word of previous result. */
  5928. t += (sp_int_word)a->dp[i] * d;
  5929. /* Store low word. */
  5930. r->dp[o] = (sp_int_digit)t;
  5931. /* Move top word down. */
  5932. t >>= SP_WORD_SIZE;
  5933. #else
  5934. /* Multiply and add into low and high from previous result.
  5935. * No overflow of possible with add. */
  5936. SP_ASM_MUL_ADD_NO(l, h, a->dp[i], d);
  5937. /* Store low word. */
  5938. r->dp[o] = l;
  5939. /* Move high word into low word and set high word to 0. */
  5940. l = h;
  5941. h = 0;
  5942. #endif
  5943. }
  5944. /* Check whether new word to be appended to result. */
  5945. #ifndef SQR_MUL_ASM
  5946. if (t > 0)
  5947. #else
  5948. if (l > 0)
  5949. #endif
  5950. {
  5951. /* Validate space available in result. */
  5952. if (o == r->size) {
  5953. err = MP_VAL;
  5954. }
  5955. else {
  5956. /* Store new top word. */
  5957. #ifndef SQR_MUL_ASM
  5958. r->dp[o++] = (sp_int_digit)t;
  5959. #else
  5960. r->dp[o++] = l;
  5961. #endif
  5962. }
  5963. }
  5964. /* Update number of words in result. */
  5965. r->used = o;
  5966. /* In case n is zero. */
  5967. sp_clamp(r);
  5968. return err;
  5969. }
  5970. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  5971. * WOLFSSL_SP_SMALL || (WOLFSSL_KEY_GEN && !NO_RSA) */
  5972. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  5973. (defined(WOLFSSL_KEY_GEN) && !defined(NO_RSA))
  5974. /* Multiply a by digit n and put result into r. r = a * n
  5975. *
  5976. * @param [in] a SP integer to multiply.
  5977. * @param [in] n Digit to multiply by.
  5978. * @param [out] r SP integer to hold result.
  5979. *
  5980. * @return MP_OKAY on success.
  5981. * @return MP_VAL when a or b is NULL, or a has maximum number of digits used.
  5982. */
  5983. int sp_mul_d(const sp_int* a, sp_int_digit d, sp_int* r)
  5984. {
  5985. int err = MP_OKAY;
  5986. /* Validate parameters. */
  5987. if ((a == NULL) || (r == NULL)) {
  5988. err = MP_VAL;
  5989. }
  5990. /* Check space for product result - _sp_mul_d checks when new word added. */
  5991. if ((err == MP_OKAY) && (a->used > r->size)) {
  5992. err = MP_VAL;
  5993. }
  5994. if (err == MP_OKAY) {
  5995. err = _sp_mul_d(a, d, r, 0);
  5996. #ifdef WOLFSSL_SP_INT_NEGATIVE
  5997. /* Update sign. */
  5998. if (d == 0) {
  5999. r->sign = MP_ZPOS;
  6000. }
  6001. else {
  6002. r->sign = a->sign;
  6003. }
  6004. #endif
  6005. }
  6006. return err;
  6007. }
  6008. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) ||
  6009. * (WOLFSSL_KEY_GEN && !NO_RSA) */
  6010. /* Predefine complicated rules of when to compile in sp_div_d and sp_mod_d. */
  6011. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  6012. defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
  6013. defined(OPENSSL_EXTRA) || defined(WC_MP_TO_RADIX)
  6014. #define WOLFSSL_SP_DIV_D
  6015. #endif
  6016. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  6017. !defined(NO_DH) || \
  6018. (defined(HAVE_ECC) && (defined(FP_ECC) || defined(HAVE_COMP_KEY))) || \
  6019. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN))
  6020. #define WOLFSSL_SP_MOD_D
  6021. #endif
  6022. #if (defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  6023. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  6024. !defined(WOLFSSL_RSA_PUBLIC_ONLY))) || \
  6025. defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
  6026. #ifndef SP_ASM_DIV_WORD
  6027. /* Divide a two digit number by a digit number and return. (hi | lo) / d
  6028. *
  6029. * @param [in] hi SP integer digit. High digit of the dividend.
  6030. * @param [in] lo SP integer digit. Lower digit of the dividend.
  6031. * @param [in] d SP integer digit. Number to divide by.
  6032. * @return The division result.
  6033. */
  6034. static WC_INLINE sp_int_digit sp_div_word(sp_int_digit hi, sp_int_digit lo,
  6035. sp_int_digit d)
  6036. {
  6037. #ifdef WOLFSSL_SP_DIV_WORD_HALF
  6038. sp_int_digit r;
  6039. /* Trial division using half of the bits in d. */
  6040. /* Check for shortcut when no high word set. */
  6041. if (hi == 0) {
  6042. r = lo / d;
  6043. }
  6044. else {
  6045. /* Half the bits of d. */
  6046. sp_int_digit divh = d >> SP_HALF_SIZE;
  6047. /* Number to divide in one value. */
  6048. sp_int_word w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
  6049. sp_int_word trial;
  6050. sp_int_digit r2;
  6051. /* Calculation for top SP_WORD_SIZE / 2 bits of dividend. */
  6052. /* Divide high word by top half of divisor. */
  6053. r = hi / divh;
  6054. /* When result too big then assume only max value. */
  6055. if (r > SP_HALF_MAX) {
  6056. r = SP_HALF_MAX;
  6057. }
  6058. /* Shift up result for trial division calucation. */
  6059. r <<= SP_HALF_SIZE;
  6060. /* Calculate trial value. */
  6061. trial = r * (sp_int_word)d;
  6062. /* Decrease r while trial is too big. */
  6063. while (trial > w) {
  6064. r -= (sp_int_digit)1 << SP_HALF_SIZE;
  6065. trial -= (sp_int_word)d << SP_HALF_SIZE;
  6066. }
  6067. /* Subtract trial. */
  6068. w -= trial;
  6069. /* Calculation for remaining second SP_WORD_SIZE / 2 bits. */
  6070. /* Divide top SP_WORD_SIZE of remainder by top half of divisor. */
  6071. r2 = ((sp_int_digit)(w >> SP_HALF_SIZE)) / divh;
  6072. /* Calculate trial value. */
  6073. trial = r2 * (sp_int_word)d;
  6074. /* Decrease r while trial is too big. */
  6075. while (trial > w) {
  6076. r2--;
  6077. trial -= d;
  6078. }
  6079. /* Subtract trial. */
  6080. w -= trial;
  6081. /* Update result. */
  6082. r += r2;
  6083. /* Calculation for remaining bottom SP_WORD_SIZE bits. */
  6084. r2 = ((sp_int_digit)w) / d;
  6085. /* Update result. */
  6086. r += r2;
  6087. }
  6088. return r;
  6089. #else
  6090. sp_int_word w;
  6091. sp_int_digit r;
  6092. /* Use built-in divide. */
  6093. w = ((sp_int_word)hi << SP_WORD_SIZE) | lo;
  6094. w /= d;
  6095. r = (sp_int_digit)w;
  6096. return r;
  6097. #endif /* WOLFSSL_SP_DIV_WORD_HALF */
  6098. }
  6099. #endif /* !SP_ASM_DIV_WORD */
  6100. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  6101. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  6102. #if (defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)) && \
  6103. !defined(WOLFSSL_SP_SMALL)
  6104. #if SP_WORD_SIZE == 64
  6105. /* 2^64 / 3 */
  6106. #define SP_DIV_3_CONST 0x5555555555555555L
  6107. /* 2^64 / 10 */
  6108. #define SP_DIV_10_CONST 0x1999999999999999L
  6109. #elif SP_WORD_SIZE == 32
  6110. /* 2^32 / 3 */
  6111. #define SP_DIV_3_CONST 0x55555555
  6112. /* 2^32 / 10 */
  6113. #define SP_DIV_10_CONST 0x19999999
  6114. #elif SP_WORD_SIZE == 16
  6115. /* 2^16 / 3 */
  6116. #define SP_DIV_3_CONST 0x5555
  6117. /* 2^16 / 10 */
  6118. #define SP_DIV_10_CONST 0x1999
  6119. #elif SP_WORD_SIZE == 8
  6120. /* 2^8 / 3 */
  6121. #define SP_DIV_3_CONST 0x55
  6122. /* 2^8 / 10 */
  6123. #define SP_DIV_10_CONST 0x19
  6124. #endif
  6125. #if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE < 64)
  6126. /* Divide by 3: r = a / 3 and rem = a % 3
  6127. *
  6128. * Used in checking prime: (a % 3) == 0?.
  6129. *
  6130. * @param [in] a SP integer to be divided.
  6131. * @param [out] r SP integer that is the quotient. May be NULL.
  6132. * @param [out] rem SP integer that is the remainder. May be NULL.
  6133. */
  6134. static void _sp_div_3(const sp_int* a, sp_int* r, sp_int_digit* rem)
  6135. {
  6136. #ifndef SQR_MUL_ASM
  6137. sp_int_word t;
  6138. sp_int_digit tt;
  6139. #else
  6140. sp_int_digit l = 0;
  6141. sp_int_digit tt = 0;
  6142. sp_int_digit t = SP_DIV_3_CONST;
  6143. sp_int_digit lm = 0;
  6144. sp_int_digit hm = 0;
  6145. #endif
  6146. sp_int_digit tr = 0;
  6147. /* Quotient fixup. */
  6148. static const unsigned char sp_r6[6] = { 0, 0, 0, 1, 1, 1 };
  6149. /* Remainder fixup. */
  6150. static const unsigned char sp_rem6[6] = { 0, 1, 2, 0, 1, 2 };
  6151. /* Check whether only mod value needed. */
  6152. if (r == NULL) {
  6153. unsigned int i;
  6154. /* 2^2 mod 3 = 4 mod 3 = 1.
  6155. * => 2^(2*n) mod 3 = (2^2 mod 3)^n mod 3 = 1^n mod 3 = 1
  6156. * => (2^(2*n) * x) mod 3 = (2^(2*n) mod 3) * (x mod 3) = x mod 3
  6157. *
  6158. * Calculate mod 3 on sum of digits as SP_WORD_SIZE is a multiple of 2.
  6159. */
  6160. #ifndef SQR_MUL_ASM
  6161. t = 0;
  6162. /* Sum the digits. */
  6163. for (i = 0; i < a->used; i++) {
  6164. t += a->dp[i];
  6165. }
  6166. /* Sum digits of sum. */
  6167. t = (t >> SP_WORD_SIZE) + (t & SP_MASK);
  6168. /* Get top digit after multipling by (2^SP_WORD_SIZE) / 3. */
  6169. tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
  6170. /* Subtract trial division. */
  6171. tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
  6172. #else
  6173. /* Sum the digits. */
  6174. for (i = 0; i < a->used; i++) {
  6175. SP_ASM_ADDC_REG(l, tr, a->dp[i]);
  6176. }
  6177. /* Sum digits of sum - can get carry. */
  6178. SP_ASM_ADDC_REG(l, tt, tr);
  6179. /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
  6180. SP_ASM_MUL(lm, hm, l, t);
  6181. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
  6182. hm += tt * SP_DIV_3_CONST;
  6183. /* Subtract trial division from digit. */
  6184. tr = l - (hm * 3);
  6185. #endif
  6186. /* tr is 0..5 but need 0..2 */
  6187. /* Fix up remainder. */
  6188. tr = sp_rem6[tr];
  6189. *rem = tr;
  6190. }
  6191. /* At least result needed - remainder is calculated anyway. */
  6192. else {
  6193. int i;
  6194. /* Divide starting at most significant word down to least. */
  6195. for (i = (int)(a->used - 1); i >= 0; i--) {
  6196. #ifndef SQR_MUL_ASM
  6197. /* Combine remainder from last operation with this word. */
  6198. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6199. /* Get top digit after multipling by (2^SP_WORD_SIZE) / 3. */
  6200. tt = (sp_int_digit)((t * SP_DIV_3_CONST) >> SP_WORD_SIZE);
  6201. /* Subtract trial division. */
  6202. tr = (sp_int_digit)(t - (sp_int_word)tt * 3);
  6203. #else
  6204. /* Multiply digit by (2^SP_WORD_SIZE) / 3. */
  6205. SP_ASM_MUL(l, tt, a->dp[i], t);
  6206. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 3 to top digit. */
  6207. tt += tr * SP_DIV_3_CONST;
  6208. /* Subtract trial division from digit. */
  6209. tr = a->dp[i] - (tt * 3);
  6210. #endif
  6211. /* tr is 0..5 but need 0..2 */
  6212. /* Fix up result. */
  6213. tt += sp_r6[tr];
  6214. /* Fix up remainder. */
  6215. tr = sp_rem6[tr];
  6216. /* Store result of digit divided by 3. */
  6217. r->dp[i] = tt;
  6218. }
  6219. /* Set the used amount to maximal amount. */
  6220. r->used = a->used;
  6221. /* Remove leading zeros. */
  6222. sp_clamp(r);
  6223. /* Return remainder if required. */
  6224. if (rem != NULL) {
  6225. *rem = tr;
  6226. }
  6227. }
  6228. }
  6229. #endif /* !(WOLFSSL_SP_SMALL && (SP_WORD_SIZE < 64) */
  6230. /* Divide by 10: r = a / 10 and rem = a % 10
  6231. *
  6232. * Used when writing with a radix of 10 - decimal number.
  6233. *
  6234. * @param [in] a SP integer to be divided.
  6235. * @param [out] r SP integer that is the quotient. May be NULL.
  6236. * @param [out] rem SP integer that is the remainder. May be NULL.
  6237. */
  6238. static void _sp_div_10(const sp_int* a, sp_int* r, sp_int_digit* rem)
  6239. {
  6240. int i;
  6241. #ifndef SQR_MUL_ASM
  6242. sp_int_word t;
  6243. sp_int_digit tt;
  6244. #else
  6245. sp_int_digit l = 0;
  6246. sp_int_digit tt = 0;
  6247. sp_int_digit t = SP_DIV_10_CONST;
  6248. #endif
  6249. sp_int_digit tr = 0;
  6250. /* Check whether only mod value needed. */
  6251. if (r == NULL) {
  6252. /* Divide starting at most significant word down to least. */
  6253. for (i = (int)(a->used - 1); i >= 0; i--) {
  6254. #ifndef SQR_MUL_ASM
  6255. /* Combine remainder from last operation with this word. */
  6256. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6257. /* Get top digit after multipling by (2^SP_WORD_SIZE) / 10. */
  6258. tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
  6259. /* Subtract trial division. */
  6260. tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
  6261. #else
  6262. /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
  6263. SP_ASM_MUL(l, tt, a->dp[i], t);
  6264. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
  6265. */
  6266. tt += tr * SP_DIV_10_CONST;
  6267. /* Subtract trial division from digit. */
  6268. tr = a->dp[i] - (tt * 10);
  6269. #endif
  6270. /* tr is 0..99 but need 0..9 */
  6271. /* Fix up remainder. */
  6272. tr = tr % 10;
  6273. }
  6274. *rem = tr;
  6275. }
  6276. /* At least result needed - remainder is calculated anyway. */
  6277. else {
  6278. /* Divide starting at most significant word down to least. */
  6279. for (i = (int)(a->used - 1); i >= 0; i--) {
  6280. #ifndef SQR_MUL_ASM
  6281. /* Combine remainder from last operation with this word. */
  6282. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6283. /* Get top digit after multipling by (2^SP_WORD_SIZE) / 10. */
  6284. tt = (sp_int_digit)((t * SP_DIV_10_CONST) >> SP_WORD_SIZE);
  6285. /* Subtract trial division. */
  6286. tr = (sp_int_digit)(t - (sp_int_word)tt * 10);
  6287. #else
  6288. /* Multiply digit by (2^SP_WORD_SIZE) / 10. */
  6289. SP_ASM_MUL(l, tt, a->dp[i], t);
  6290. /* Add remainder multiplied by (2^SP_WORD_SIZE) / 10 to top digit.
  6291. */
  6292. tt += tr * SP_DIV_10_CONST;
  6293. /* Subtract trial division from digit. */
  6294. tr = a->dp[i] - (tt * 10);
  6295. #endif
  6296. /* tr is 0..99 but need 0..9 */
  6297. /* Fix up result. */
  6298. tt += tr / 10;
  6299. /* Fix up remainder. */
  6300. tr %= 10;
  6301. /* Store result of digit divided by 10. */
  6302. r->dp[i] = tt;
  6303. }
  6304. /* Set the used amount to maximal amount. */
  6305. r->used = a->used;
  6306. /* Remove leading zeros. */
  6307. sp_clamp(r);
  6308. /* Return remainder if required. */
  6309. if (rem != NULL) {
  6310. *rem = tr;
  6311. }
  6312. }
  6313. }
  6314. #endif /* (WOLFSSL_SP_DIV_D || WOLFSSL_SP_MOD_D) && !WOLFSSL_SP_SMALL */
  6315. #if defined(WOLFSSL_SP_DIV_D) || defined(WOLFSSL_SP_MOD_D)
  6316. /* Divide by small number: r = a / d and rem = a % d
  6317. *
  6318. * @param [in] a SP integer to be divided.
  6319. * @param [in] d Digit to divide by.
  6320. * @param [out] r SP integer that is the quotient. May be NULL.
  6321. * @param [out] rem SP integer that is the remainder. May be NULL.
  6322. */
  6323. static void _sp_div_small(const sp_int* a, sp_int_digit d, sp_int* r,
  6324. sp_int_digit* rem)
  6325. {
  6326. int i;
  6327. #ifndef SQR_MUL_ASM
  6328. sp_int_word t;
  6329. sp_int_digit tt;
  6330. #else
  6331. sp_int_digit l = 0;
  6332. sp_int_digit tt = 0;
  6333. #endif
  6334. sp_int_digit tr = 0;
  6335. sp_int_digit m = SP_DIGIT_MAX / d;
  6336. #ifndef WOLFSSL_SP_SMALL
  6337. /* Check whether only mod value needed. */
  6338. if (r == NULL) {
  6339. /* Divide starting at most significant word down to least. */
  6340. for (i = (int)(a->used - 1); i >= 0; i--) {
  6341. #ifndef SQR_MUL_ASM
  6342. /* Combine remainder from last operation with this word. */
  6343. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6344. /* Get top digit after multipling. */
  6345. tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
  6346. /* Subtract trial division. */
  6347. tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
  6348. #else
  6349. /* Multiply digit. */
  6350. SP_ASM_MUL(l, tt, a->dp[i], m);
  6351. /* Add multiplied remainder to top digit. */
  6352. tt += tr * m;
  6353. /* Subtract trial division from digit. */
  6354. tr = a->dp[i] - (tt * d);
  6355. #endif
  6356. /* tr < d * d */
  6357. /* Fix up remainder. */
  6358. tr = tr % d;
  6359. }
  6360. *rem = tr;
  6361. }
  6362. /* At least result needed - remainder is calculated anyway. */
  6363. else
  6364. #endif /* !WOLFSSL_SP_SMALL */
  6365. {
  6366. /* Divide starting at most significant word down to least. */
  6367. for (i = (int)(a->used - 1); i >= 0; i--) {
  6368. #ifndef SQR_MUL_ASM
  6369. /* Combine remainder from last operation with this word. */
  6370. t = ((sp_int_word)tr << SP_WORD_SIZE) | a->dp[i];
  6371. /* Get top digit after multipling. */
  6372. tt = (sp_int_digit)((t * m) >> SP_WORD_SIZE);
  6373. /* Subtract trial division. */
  6374. tr = (sp_int_digit)t - (sp_int_digit)(tt * d);
  6375. #else
  6376. /* Multiply digit. */
  6377. SP_ASM_MUL(l, tt, a->dp[i], m);
  6378. /* Add multiplied remainder to top digit. */
  6379. tt += tr * m;
  6380. /* Subtract trial division from digit. */
  6381. tr = a->dp[i] - (tt * d);
  6382. #endif
  6383. /* tr < d * d */
  6384. /* Fix up result. */
  6385. tt += tr / d;
  6386. /* Fix up remainder. */
  6387. tr %= d;
  6388. /* Store result of dividing the digit. */
  6389. #ifdef WOLFSSL_SP_SMALL
  6390. if (r != NULL)
  6391. #endif
  6392. {
  6393. r->dp[i] = tt;
  6394. }
  6395. }
  6396. #ifdef WOLFSSL_SP_SMALL
  6397. if (r != NULL)
  6398. #endif
  6399. {
  6400. /* Set the used amount to maximal amount. */
  6401. r->used = a->used;
  6402. /* Remove leading zeros. */
  6403. sp_clamp(r);
  6404. }
  6405. /* Return remainder if required. */
  6406. if (rem != NULL) {
  6407. *rem = tr;
  6408. }
  6409. }
  6410. }
  6411. #endif
  6412. #ifdef WOLFSSL_SP_DIV_D
  6413. /* Divide a multi-precision number by a digit size number and calculate
  6414. * remainder.
  6415. * r = a / d; rem = a % d
  6416. *
  6417. * Use trial division algorithm.
  6418. *
  6419. * @param [in] a SP integer to be divided.
  6420. * @param [in] d Digit to divide by.
  6421. * @param [out] r SP integer that is the quotient. May be NULL.
  6422. * @param [out] rem Digit that is the remainder. May be NULL.
  6423. */
  6424. static void _sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r,
  6425. sp_int_digit* rem)
  6426. {
  6427. int i;
  6428. #ifndef SQR_MUL_ASM
  6429. sp_int_word w = 0;
  6430. #else
  6431. sp_int_digit l;
  6432. sp_int_digit h = 0;
  6433. #endif
  6434. sp_int_digit t;
  6435. /* Divide starting at most significant word down to least. */
  6436. for (i = (int)(a->used - 1); i >= 0; i--) {
  6437. #ifndef SQR_MUL_ASM
  6438. /* Combine remainder from last operation with this word and divide. */
  6439. t = sp_div_word((sp_int_digit)w, a->dp[i], d);
  6440. /* Combine remainder from last operation with this word. */
  6441. w = (w << SP_WORD_SIZE) | a->dp[i];
  6442. /* Subtract to get modulo result. */
  6443. w -= (sp_int_word)t * d;
  6444. #else
  6445. /* Get current word. */
  6446. l = a->dp[i];
  6447. /* Combine remainder from last operation with this word and divide. */
  6448. t = sp_div_word(h, l, d);
  6449. /* Subtract to get modulo result. */
  6450. h = l - t * d;
  6451. #endif
  6452. /* Store result of dividing the digit. */
  6453. if (r != NULL) {
  6454. r->dp[i] = t;
  6455. }
  6456. }
  6457. if (r != NULL) {
  6458. /* Set the used amount to maximal amount. */
  6459. r->used = a->used;
  6460. /* Remove leading zeros. */
  6461. sp_clamp(r);
  6462. }
  6463. /* Return remainder if required. */
  6464. if (rem != NULL) {
  6465. #ifndef SQR_MUL_ASM
  6466. *rem = (sp_int_digit)w;
  6467. #else
  6468. *rem = h;
  6469. #endif
  6470. }
  6471. }
  6472. /* Divide a multi-precision number by a digit size number and calculate
  6473. * remainder.
  6474. * r = a / d; rem = a % d
  6475. *
  6476. * @param [in] a SP integer to be divided.
  6477. * @param [in] d Digit to divide by.
  6478. * @param [out] r SP integer that is the quotient. May be NULL.
  6479. * @param [out] rem Digit that is the remainder. May be NULL.
  6480. *
  6481. * @return MP_OKAY on success.
  6482. * @return MP_VAL when a is NULL or d is 0.
  6483. */
  6484. int sp_div_d(const sp_int* a, sp_int_digit d, sp_int* r, sp_int_digit* rem)
  6485. {
  6486. int err = MP_OKAY;
  6487. /* Validate parameters. */
  6488. if ((a == NULL) || (d == 0)) {
  6489. err = MP_VAL;
  6490. }
  6491. /* Check space for maximal sized result. */
  6492. if ((err == MP_OKAY) && (r != NULL) && (a->used > r->size)) {
  6493. err = MP_VAL;
  6494. }
  6495. if (err == MP_OKAY) {
  6496. #if !defined(WOLFSSL_SP_SMALL)
  6497. #if SP_WORD_SIZE < 64
  6498. if (d == 3) {
  6499. /* Fast implementation for divisor of 3. */
  6500. _sp_div_3(a, r, rem);
  6501. }
  6502. else
  6503. #endif
  6504. if (d == 10) {
  6505. /* Fast implementation for divisor of 10 - sp_todecimal(). */
  6506. _sp_div_10(a, r, rem);
  6507. }
  6508. else
  6509. #endif
  6510. if (d <= SP_HALF_MAX) {
  6511. /* For small divisors. */
  6512. _sp_div_small(a, d, r, rem);
  6513. }
  6514. else
  6515. {
  6516. _sp_div_d(a, d, r, rem);
  6517. }
  6518. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6519. if (r != NULL) {
  6520. r->sign = a->sign;
  6521. }
  6522. #endif
  6523. }
  6524. return err;
  6525. }
  6526. #endif /* WOLFSSL_SP_DIV_D */
  6527. #ifdef WOLFSSL_SP_MOD_D
  6528. /* Calculate a modulo the digit d into r: r = a mod d
  6529. *
  6530. * @param [in] a SP integer to reduce.
  6531. * @param [in] d Digit to that is the modulus.
  6532. * @param [out] r Digit that is the result.
  6533. */
  6534. static void _sp_mod_d(const sp_int* a, const sp_int_digit d, sp_int_digit* r)
  6535. {
  6536. int i;
  6537. #ifndef SQR_MUL_ASM
  6538. sp_int_word w = 0;
  6539. #else
  6540. sp_int_digit h = 0;
  6541. #endif
  6542. /* Divide starting at most significant word down to least. */
  6543. for (i = (int)(a->used - 1); i >= 0; i--) {
  6544. #ifndef SQR_MUL_ASM
  6545. /* Combine remainder from last operation with this word and divide. */
  6546. sp_int_digit t = sp_div_word((sp_int_digit)w, a->dp[i], d);
  6547. /* Combine remainder from last operation with this word. */
  6548. w = (w << SP_WORD_SIZE) | a->dp[i];
  6549. /* Subtract to get modulo result. */
  6550. w -= (sp_int_word)t * d;
  6551. #else
  6552. /* Combine remainder from last operation with this word and divide. */
  6553. sp_int_digit t = sp_div_word(h, a->dp[i], d);
  6554. /* Subtract to get modulo result. */
  6555. h = a->dp[i] - t * d;
  6556. #endif
  6557. }
  6558. /* Return remainder. */
  6559. #ifndef SQR_MUL_ASM
  6560. *r = (sp_int_digit)w;
  6561. #else
  6562. *r = h;
  6563. #endif
  6564. }
  6565. /* Calculate a modulo the digit d into r: r = a mod d
  6566. *
  6567. * @param [in] a SP integer to reduce.
  6568. * @param [in] d Digit to that is the modulus.
  6569. * @param [out] r Digit that is the result.
  6570. *
  6571. * @return MP_OKAY on success.
  6572. * @return MP_VAL when a is NULL or d is 0.
  6573. */
  6574. #if !defined(WOLFSSL_SP_MATH_ALL) && (!defined(HAVE_ECC) || \
  6575. !defined(HAVE_COMP_KEY)) && !defined(OPENSSL_EXTRA)
  6576. static
  6577. #endif /* !WOLFSSL_SP_MATH_ALL && (!HAVE_ECC || !HAVE_COMP_KEY) */
  6578. int sp_mod_d(const sp_int* a, sp_int_digit d, sp_int_digit* r)
  6579. {
  6580. int err = MP_OKAY;
  6581. /* Validate parameters. */
  6582. if ((a == NULL) || (r == NULL) || (d == 0)) {
  6583. err = MP_VAL;
  6584. }
  6585. #if 0
  6586. sp_print(a, "a");
  6587. sp_print_digit(d, "m");
  6588. #endif
  6589. if (err == MP_OKAY) {
  6590. /* Check whether d is a power of 2. */
  6591. if ((d & (d - 1)) == 0) {
  6592. if (a->used == 0) {
  6593. *r = 0;
  6594. }
  6595. else {
  6596. *r = a->dp[0] & (d - 1);
  6597. }
  6598. }
  6599. #if !defined(WOLFSSL_SP_SMALL)
  6600. #if SP_WORD_SIZE < 64
  6601. else if (d == 3) {
  6602. /* Fast implementation for divisor of 3. */
  6603. _sp_div_3(a, NULL, r);
  6604. }
  6605. #endif
  6606. else if (d == 10) {
  6607. /* Fast implementation for divisor of 10. */
  6608. _sp_div_10(a, NULL, r);
  6609. }
  6610. #endif
  6611. else if (d <= SP_HALF_MAX) {
  6612. /* For small divisors. */
  6613. _sp_div_small(a, d, NULL, r);
  6614. }
  6615. else {
  6616. _sp_mod_d(a, d, r);
  6617. }
  6618. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6619. if (a->sign == MP_NEG) {
  6620. *r = d - *r;
  6621. }
  6622. #endif
  6623. }
  6624. #if 0
  6625. sp_print_digit(*r, "rmod");
  6626. #endif
  6627. return err;
  6628. }
  6629. #endif /* WOLFSSL_SP_MOD_D */
  6630. #if defined(HAVE_ECC) || !defined(NO_DSA) || defined(OPENSSL_EXTRA) || \
  6631. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  6632. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  6633. /* Divides a by 2 and stores in r: r = a >> 1
  6634. *
  6635. * @param [in] a SP integer to divide.
  6636. * @param [out] r SP integer to hold result.
  6637. */
  6638. static void _sp_div_2(const sp_int* a, sp_int* r)
  6639. {
  6640. int i;
  6641. /* Shift down each word by 1 and include bottom bit of next at top. */
  6642. for (i = 0; i < (int)a->used - 1; i++) {
  6643. r->dp[i] = (a->dp[i] >> 1) | (a->dp[i+1] << (SP_WORD_SIZE - 1));
  6644. }
  6645. /* Last word only needs to be shifted down. */
  6646. r->dp[i] = a->dp[i] >> 1;
  6647. /* Set used to be all words seen. */
  6648. r->used = (unsigned int)i + 1;
  6649. /* Remove leading zeros. */
  6650. sp_clamp(r);
  6651. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6652. /* Same sign in result. */
  6653. r->sign = a->sign;
  6654. #endif
  6655. }
  6656. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  6657. /* Divides a by 2 and stores in r: r = a >> 1
  6658. *
  6659. * @param [in] a SP integer to divide.
  6660. * @param [out] r SP integer to hold result.
  6661. *
  6662. * @return MP_OKAY on success.
  6663. * @return MP_VAL when a or r is NULL.
  6664. */
  6665. int sp_div_2(const sp_int* a, sp_int* r)
  6666. {
  6667. int err = MP_OKAY;
  6668. /* Only when a public API. */
  6669. if ((a == NULL) || (r == NULL)) {
  6670. err = MP_VAL;
  6671. }
  6672. /* Ensure maximal size is supported by result. */
  6673. if ((err == MP_OKAY) && (a->used > r->size)) {
  6674. err = MP_VAL;
  6675. }
  6676. if (err == MP_OKAY) {
  6677. _sp_div_2(a, r);
  6678. }
  6679. return err;
  6680. }
  6681. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  6682. #endif /* HAVE_ECC || !NO_DSA || OPENSSL_EXTRA ||
  6683. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  6684. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  6685. /* Divides a by 2 mod m and stores in r: r = (a / 2) mod m
  6686. *
  6687. * r = a / 2 (mod m) - constant time (a < m and positive)
  6688. *
  6689. * @param [in] a SP integer to divide.
  6690. * @param [in] m SP integer that is modulus.
  6691. * @param [out] r SP integer to hold result.
  6692. *
  6693. * @return MP_OKAY on success.
  6694. * @return MP_VAL when a, m or r is NULL.
  6695. */
  6696. int sp_div_2_mod_ct(const sp_int* a, const sp_int* m, sp_int* r)
  6697. {
  6698. int err = MP_OKAY;
  6699. /* Validate parameters. */
  6700. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  6701. err = MP_VAL;
  6702. }
  6703. /* Check result has enough space for a + m. */
  6704. if ((err == MP_OKAY) && (m->used + 1 > r->size)) {
  6705. err = MP_VAL;
  6706. }
  6707. if (err == MP_OKAY) {
  6708. #ifndef SQR_MUL_ASM
  6709. sp_int_word w = 0;
  6710. #else
  6711. sp_int_digit l = 0;
  6712. sp_int_digit h;
  6713. sp_int_digit t;
  6714. #endif
  6715. /* Mask to apply to modulus. */
  6716. sp_int_digit mask = (sp_int_digit)0 - (a->dp[0] & 1);
  6717. unsigned int i;
  6718. #if 0
  6719. sp_print(a, "a");
  6720. sp_print(m, "m");
  6721. #endif
  6722. /* Add a to m, if a is odd, into r in constant time. */
  6723. for (i = 0; i < m->used; i++) {
  6724. /* Mask to apply to a - set when used value at index. */
  6725. sp_int_digit mask_a = (sp_int_digit)0 - (i < a->used);
  6726. #ifndef SQR_MUL_ASM
  6727. /* Conditionally add modulus. */
  6728. w += m->dp[i] & mask;
  6729. /* Conditionally add a. */
  6730. w += a->dp[i] & mask_a;
  6731. /* Store low digit in result. */
  6732. r->dp[i] = (sp_int_digit)w;
  6733. /* Move high digit down. */
  6734. w >>= DIGIT_BIT;
  6735. #else
  6736. /* No high digit. */
  6737. h = 0;
  6738. /* Conditionally use modulus. */
  6739. t = m->dp[i] & mask;
  6740. /* Add with carry modulus. */
  6741. SP_ASM_ADDC_REG(l, h, t);
  6742. /* Conditionally use a. */
  6743. t = a->dp[i] & mask_a;
  6744. /* Add with carry a. */
  6745. SP_ASM_ADDC_REG(l, h, t);
  6746. /* Store low digit in result. */
  6747. r->dp[i] = l;
  6748. /* Move high digit down. */
  6749. l = h;
  6750. #endif
  6751. }
  6752. /* Store carry. */
  6753. #ifndef SQR_MUL_ASM
  6754. r->dp[i] = (sp_int_digit)w;
  6755. #else
  6756. r->dp[i] = l;
  6757. #endif
  6758. /* Used includes carry - set or not. */
  6759. r->used = i + 1;
  6760. #ifdef WOLFSSL_SP_INT_NEGATIVE
  6761. r->sign = MP_ZPOS;
  6762. #endif
  6763. /* Divide conditional sum by 2. */
  6764. _sp_div_2(r, r);
  6765. #if 0
  6766. sp_print(r, "rd2");
  6767. #endif
  6768. }
  6769. return err;
  6770. }
  6771. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  6772. /************************
  6773. * Add/Subtract Functions
  6774. ************************/
  6775. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
  6776. /* Add offset b to a into r: r = a + (b << (o * SP_WORD_SIZEOF))
  6777. *
  6778. * @param [in] a SP integer to add to.
  6779. * @param [in] b SP integer to add.
  6780. * @param [out] r SP integer to store result in.
  6781. * @param [in] o Number of digits to offset b.
  6782. */
  6783. static void _sp_add_off(const sp_int* a, const sp_int* b, sp_int* r, int o)
  6784. {
  6785. unsigned int i = 0;
  6786. #ifndef SQR_MUL_ASM
  6787. sp_int_word t = 0;
  6788. #else
  6789. sp_int_digit l = 0;
  6790. sp_int_digit h = 0;
  6791. sp_int_digit t = 0;
  6792. #endif
  6793. #ifdef SP_MATH_NEED_ADD_OFF
  6794. unsigned int j;
  6795. /* Copy a into result up to offset. */
  6796. for (; (i < o) && (i < a->used); i++) {
  6797. r->dp[i] = a->dp[i];
  6798. }
  6799. /* Set result to 0 for digits beyonf those in a. */
  6800. for (; i < o; i++) {
  6801. r->dp[i] = 0;
  6802. }
  6803. /* Add each digit from a and b where both have values. */
  6804. for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
  6805. #ifndef SQR_MUL_ASM
  6806. t += a->dp[i];
  6807. t += b->dp[j];
  6808. r->dp[i] = (sp_int_digit)t;
  6809. t >>= SP_WORD_SIZE;
  6810. #else
  6811. t = a->dp[i];
  6812. SP_ASM_ADDC(l, h, t);
  6813. t = b->dp[j];
  6814. SP_ASM_ADDC(l, h, t);
  6815. r->dp[i] = l;
  6816. l = h;
  6817. h = 0;
  6818. #endif
  6819. }
  6820. /* Either a and/or b are out of digits. Add carry and remaining a digits. */
  6821. for (; i < a->used; i++) {
  6822. #ifndef SQR_MUL_ASM
  6823. t += a->dp[i];
  6824. r->dp[i] = (sp_int_digit)t;
  6825. t >>= SP_WORD_SIZE;
  6826. #else
  6827. t = a->dp[i];
  6828. SP_ASM_ADDC(l, h, t);
  6829. r->dp[i] = l;
  6830. l = h;
  6831. h = 0;
  6832. #endif
  6833. }
  6834. /* a is out of digits. Add carry and remaining b digits. */
  6835. for (; j < b->used; i++, j++) {
  6836. #ifndef SQR_MUL_ASM
  6837. t += b->dp[j];
  6838. r->dp[i] = (sp_int_digit)t;
  6839. t >>= SP_WORD_SIZE;
  6840. #else
  6841. t = b->dp[j];
  6842. SP_ASM_ADDC(l, h, t);
  6843. r->dp[i] = l;
  6844. l = h;
  6845. h = 0;
  6846. #endif
  6847. }
  6848. #else
  6849. (void)o;
  6850. /* Add each digit from a and b where both have values. */
  6851. for (; (i < a->used) && (i < b->used); i++) {
  6852. #ifndef SQR_MUL_ASM
  6853. t += a->dp[i];
  6854. t += b->dp[i];
  6855. r->dp[i] = (sp_int_digit)t;
  6856. t >>= SP_WORD_SIZE;
  6857. #else
  6858. t = a->dp[i];
  6859. SP_ASM_ADDC(l, h, t);
  6860. t = b->dp[i];
  6861. SP_ASM_ADDC(l, h, t);
  6862. r->dp[i] = l;
  6863. l = h;
  6864. h = 0;
  6865. #endif
  6866. }
  6867. /* Either a and/or b are out of digits. Add carry and remaining a digits. */
  6868. for (; i < a->used; i++) {
  6869. #ifndef SQR_MUL_ASM
  6870. t += a->dp[i];
  6871. r->dp[i] = (sp_int_digit)t;
  6872. t >>= SP_WORD_SIZE;
  6873. #else
  6874. t = a->dp[i];
  6875. SP_ASM_ADDC(l, h, t);
  6876. r->dp[i] = l;
  6877. l = h;
  6878. h = 0;
  6879. #endif
  6880. }
  6881. /* a is out of digits. Add carry and remaining b digits. */
  6882. for (; i < b->used; i++) {
  6883. #ifndef SQR_MUL_ASM
  6884. t += b->dp[i];
  6885. r->dp[i] = (sp_int_digit)t;
  6886. t >>= SP_WORD_SIZE;
  6887. #else
  6888. t = b->dp[i];
  6889. SP_ASM_ADDC(l, h, t);
  6890. r->dp[i] = l;
  6891. l = h;
  6892. h = 0;
  6893. #endif
  6894. }
  6895. #endif
  6896. /* Set used based on last digit put in. */
  6897. r->used = i;
  6898. /* Put in carry. */
  6899. #ifndef SQR_MUL_ASM
  6900. r->dp[i] = (sp_int_digit)t;
  6901. r->used += (t != 0);
  6902. #else
  6903. r->dp[i] = l;
  6904. r->used += (l != 0);
  6905. #endif
  6906. /* Remove leading zeros. */
  6907. sp_clamp(r);
  6908. }
  6909. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  6910. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_SP_INT_NEGATIVE) || \
  6911. !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
  6912. !defined(WOLFSSL_RSA_VERIFY_ONLY))
  6913. /* Sub offset b from a into r: r = a - (b << (o * SP_WORD_SIZEOF))
  6914. * a must be greater than b.
  6915. *
  6916. * When using offset, r == a is faster.
  6917. *
  6918. * @param [in] a SP integer to subtract from.
  6919. * @param [in] b SP integer to subtract.
  6920. * @param [out] r SP integer to store result in.
  6921. * @param [in] o Number of digits to offset b.
  6922. */
  6923. static void _sp_sub_off(const sp_int* a, const sp_int* b, sp_int* r,
  6924. unsigned int o)
  6925. {
  6926. unsigned int i = 0;
  6927. unsigned int j;
  6928. #ifndef SQR_MUL_ASM
  6929. sp_int_sword t = 0;
  6930. #else
  6931. sp_int_digit l = 0;
  6932. sp_int_digit h = 0;
  6933. #endif
  6934. /* Need to copy digits up to offset into result. */
  6935. if (r != a) {
  6936. for (; (i < o) && (i < a->used); i++) {
  6937. r->dp[i] = a->dp[i];
  6938. }
  6939. }
  6940. else {
  6941. i = o;
  6942. }
  6943. /* Index to add at is the offset now. */
  6944. for (j = 0; (i < a->used) && (j < b->used); i++, j++) {
  6945. #ifndef SQR_MUL_ASM
  6946. /* Add a into and subtract b from current value. */
  6947. t += a->dp[i];
  6948. t -= b->dp[j];
  6949. /* Store low digit in result. */
  6950. r->dp[i] = (sp_int_digit)t;
  6951. /* Move high digit down. */
  6952. t >>= SP_WORD_SIZE;
  6953. #else
  6954. /* Add a into and subtract b from current value. */
  6955. SP_ASM_ADDC(l, h, a->dp[i]);
  6956. SP_ASM_SUBB(l, h, b->dp[j]);
  6957. /* Store low digit in result. */
  6958. r->dp[i] = l;
  6959. /* Move high digit down. */
  6960. l = h;
  6961. /* High digit is 0 when positive or -1 on negative. */
  6962. h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
  6963. #endif
  6964. }
  6965. for (; i < a->used; i++) {
  6966. #ifndef SQR_MUL_ASM
  6967. /* Add a into current value. */
  6968. t += a->dp[i];
  6969. /* Store low digit in result. */
  6970. r->dp[i] = (sp_int_digit)t;
  6971. /* Move high digit down. */
  6972. t >>= SP_WORD_SIZE;
  6973. #else
  6974. /* Add a into current value. */
  6975. SP_ASM_ADDC(l, h, a->dp[i]);
  6976. /* Store low digit in result. */
  6977. r->dp[i] = l;
  6978. /* Move high digit down. */
  6979. l = h;
  6980. /* High digit is 0 when positive or -1 on negative. */
  6981. h = (sp_int_digit)0 - (h >> (SP_WORD_SIZE - 1));
  6982. #endif
  6983. }
  6984. /* Set used based on last digit put in. */
  6985. r->used = i;
  6986. /* Remove leading zeros. */
  6987. sp_clamp(r);
  6988. }
  6989. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_SP_INT_NEGATIVE || !NO_DH ||
  6990. * HAVE_ECC || (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  6991. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_SP_INVMOD)
  6992. /* Add b to a into r: r = a + b
  6993. *
  6994. * @param [in] a SP integer to add to.
  6995. * @param [in] b SP integer to add.
  6996. * @param [out] r SP integer to store result in.
  6997. *
  6998. * @return MP_OKAY on success.
  6999. * @return MP_VAL when a, b, or r is NULL.
  7000. */
  7001. int sp_add(const sp_int* a, const sp_int* b, sp_int* r)
  7002. {
  7003. int err = MP_OKAY;
  7004. /* Validate parameters. */
  7005. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  7006. err = MP_VAL;
  7007. }
  7008. /* Check that r as big as a and b plus one word. */
  7009. if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
  7010. err = MP_VAL;
  7011. }
  7012. if (err == MP_OKAY) {
  7013. #ifndef WOLFSSL_SP_INT_NEGATIVE
  7014. /* Add two positive numbers. */
  7015. _sp_add_off(a, b, r, 0);
  7016. #else
  7017. /* Same sign then add absolute values and use sign. */
  7018. if (a->sign == b->sign) {
  7019. _sp_add_off(a, b, r, 0);
  7020. r->sign = a->sign;
  7021. }
  7022. /* Different sign and abs(a) >= abs(b). */
  7023. else if (_sp_cmp_abs(a, b) != MP_LT) {
  7024. /* Subtract absolute values and use sign of a unless result 0. */
  7025. _sp_sub_off(a, b, r, 0);
  7026. if (sp_iszero(r)) {
  7027. r->sign = MP_ZPOS;
  7028. }
  7029. else {
  7030. r->sign = a->sign;
  7031. }
  7032. }
  7033. /* Different sign and abs(a) < abs(b). */
  7034. else {
  7035. /* Reverse subtract absolute values and use sign of b. */
  7036. _sp_sub_off(b, a, r, 0);
  7037. r->sign = b->sign;
  7038. }
  7039. #endif
  7040. }
  7041. return err;
  7042. }
  7043. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  7044. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7045. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  7046. /* Subtract b from a into r: r = a - b
  7047. *
  7048. * a must be greater than b unless WOLFSSL_SP_INT_NEGATIVE is defined.
  7049. *
  7050. * @param [in] a SP integer to subtract from.
  7051. * @param [in] b SP integer to subtract.
  7052. * @param [out] r SP integer to store result in.
  7053. *
  7054. * @return MP_OKAY on success.
  7055. * @return MP_VAL when a, b, or r is NULL.
  7056. */
  7057. int sp_sub(const sp_int* a, const sp_int* b, sp_int* r)
  7058. {
  7059. int err = MP_OKAY;
  7060. /* Validate parameters. */
  7061. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  7062. err = MP_VAL;
  7063. }
  7064. /* Check that r as big as a and b plus one word. */
  7065. if ((err == MP_OKAY) && ((a->used >= r->size) || (b->used >= r->size))) {
  7066. err = MP_VAL;
  7067. }
  7068. if (err == MP_OKAY) {
  7069. #ifndef WOLFSSL_SP_INT_NEGATIVE
  7070. /* Subtract positive numbers b from a. */
  7071. _sp_sub_off(a, b, r, 0);
  7072. #else
  7073. /* Different sign. */
  7074. if (a->sign != b->sign) {
  7075. /* Add absolute values and use sign of a. */
  7076. _sp_add_off(a, b, r, 0);
  7077. r->sign = a->sign;
  7078. }
  7079. /* Same sign and abs(a) >= abs(b). */
  7080. else if (_sp_cmp_abs(a, b) != MP_LT) {
  7081. /* Subtract absolute values and use sign of a unless result 0. */
  7082. _sp_sub_off(a, b, r, 0);
  7083. if (sp_iszero(r)) {
  7084. r->sign = MP_ZPOS;
  7085. }
  7086. else {
  7087. r->sign = a->sign;
  7088. }
  7089. }
  7090. /* Same sign and abs(a) < abs(b). */
  7091. else {
  7092. /* Reverse subtract absolute values and use opposite sign of a */
  7093. _sp_sub_off(b, a, r, 0);
  7094. r->sign = 1 - a->sign;
  7095. }
  7096. #endif
  7097. }
  7098. return err;
  7099. }
  7100. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  7101. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY)*/
  7102. /****************************
  7103. * Add/Subtract mod functions
  7104. ****************************/
  7105. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  7106. (!defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_CUSTOM_CURVES)) || \
  7107. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE)
  7108. /* Add two value and reduce: r = (a + b) % m
  7109. *
  7110. * @param [in] a SP integer to add.
  7111. * @param [in] b SP integer to add with.
  7112. * @param [in] m SP integer that is the modulus.
  7113. * @param [out] r SP integer to hold result.
  7114. *
  7115. * @return MP_OKAY on success.
  7116. * @return MP_MEM when dynamic memory allocation fails.
  7117. */
  7118. static int _sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m,
  7119. sp_int* r)
  7120. {
  7121. int err = MP_OKAY;
  7122. /* Calculate used based on digits used in a and b. */
  7123. unsigned int used = ((a->used >= b->used) ? a->used + 1 : b->used + 1);
  7124. DECL_SP_INT(t, used);
  7125. /* Allocate a temporary SP int to hold sum. */
  7126. ALLOC_SP_INT_SIZE(t, used, err, NULL);
  7127. if (err == MP_OKAY) {
  7128. /* Do sum. */
  7129. err = sp_add(a, b, t);
  7130. }
  7131. if (err == MP_OKAY) {
  7132. /* Mod result. */
  7133. err = sp_mod(t, m, r);
  7134. }
  7135. FREE_SP_INT(t, NULL);
  7136. return err;
  7137. }
  7138. /* Add two value and reduce: r = (a + b) % m
  7139. *
  7140. * @param [in] a SP integer to add.
  7141. * @param [in] b SP integer to add with.
  7142. * @param [in] m SP integer that is the modulus.
  7143. * @param [out] r SP integer to hold result.
  7144. *
  7145. * @return MP_OKAY on success.
  7146. * @return MP_VAL when a, b, m or r is NULL.
  7147. * @return MP_MEM when dynamic memory allocation fails.
  7148. */
  7149. int sp_addmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7150. {
  7151. int err = MP_OKAY;
  7152. /* Validate parameters. */
  7153. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  7154. err = MP_VAL;
  7155. }
  7156. /* Ensure a and b aren't too big a number to operate on. */
  7157. else if (a->used >= SP_INT_DIGITS) {
  7158. err = MP_VAL;
  7159. }
  7160. else if (b->used >= SP_INT_DIGITS) {
  7161. err = MP_VAL;
  7162. }
  7163. #if 0
  7164. if (err == MP_OKAY) {
  7165. sp_print(a, "a");
  7166. sp_print(b, "b");
  7167. sp_print(m, "m");
  7168. }
  7169. #endif
  7170. if (err == MP_OKAY) {
  7171. /* Do add and modular reduction. */
  7172. err = _sp_addmod(a, b, m, r);
  7173. }
  7174. #if 0
  7175. if (err == MP_OKAY) {
  7176. sp_print(r, "rma");
  7177. }
  7178. #endif
  7179. return err;
  7180. }
  7181. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_CUSTOM_CURVES) ||
  7182. * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
  7183. #if defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  7184. defined(HAVE_ECC))
  7185. /* Sub b from a and reduce: r = (a - b) % m
  7186. * Result is always positive.
  7187. *
  7188. * @param [in] a SP integer to subtract from
  7189. * @param [in] b SP integer to subtract.
  7190. * @param [in] m SP integer that is the modulus.
  7191. * @param [out] r SP integer to hold result.
  7192. *
  7193. * @return MP_OKAY on success.
  7194. * @return MP_MEM when dynamic memory allocation fails.
  7195. */
  7196. static int _sp_submod(const sp_int* a, const sp_int* b, const sp_int* m,
  7197. sp_int* r)
  7198. {
  7199. int err = MP_OKAY;
  7200. #ifndef WOLFSSL_SP_INT_NEGATIVE
  7201. unsigned int used = ((a->used >= m->used) ?
  7202. ((a->used >= b->used) ? (a->used + 1) : (b->used + 1)) :
  7203. ((b->used >= m->used)) ? (b->used + 1) : (m->used + 1));
  7204. DECL_SP_INT_ARRAY(t, used, 2);
  7205. ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
  7206. if (err == MP_OKAY) {
  7207. /* Reduce a to less than m. */
  7208. if (_sp_cmp(a, m) != MP_LT) {
  7209. err = sp_mod(a, m, t[0]);
  7210. a = t[0];
  7211. }
  7212. }
  7213. if (err == MP_OKAY) {
  7214. /* Reduce b to less than m. */
  7215. if (_sp_cmp(b, m) != MP_LT) {
  7216. err = sp_mod(b, m, t[1]);
  7217. b = t[1];
  7218. }
  7219. }
  7220. if (err == MP_OKAY) {
  7221. /* Add m to a if a smaller than b. */
  7222. if (_sp_cmp(a, b) == MP_LT) {
  7223. err = sp_add(a, m, t[0]);
  7224. a = t[0];
  7225. }
  7226. }
  7227. if (err == MP_OKAY) {
  7228. /* Subtract b from a. */
  7229. err = sp_sub(a, b, r);
  7230. }
  7231. FREE_SP_INT_ARRAY(t, NULL);
  7232. #else /* WOLFSSL_SP_INT_NEGATIVE */
  7233. unsigned int used = ((a->used >= b->used) ? a->used + 1 : b->used + 1);
  7234. DECL_SP_INT(t, used);
  7235. ALLOC_SP_INT_SIZE(t, used, err, NULL);
  7236. /* Subtract b from a into temporary. */
  7237. if (err == MP_OKAY) {
  7238. err = sp_sub(a, b, t);
  7239. }
  7240. if (err == MP_OKAY) {
  7241. /* Reduce result mod m into result. */
  7242. err = sp_mod(t, m, r);
  7243. }
  7244. FREE_SP_INT(t, NULL);
  7245. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7246. return err;
  7247. }
  7248. /* Sub b from a and reduce: r = (a - b) % m
  7249. * Result is always positive.
  7250. *
  7251. * @param [in] a SP integer to subtract from
  7252. * @param [in] b SP integer to subtract.
  7253. * @param [in] m SP integer that is the modulus.
  7254. * @param [out] r SP integer to hold result.
  7255. *
  7256. * @return MP_OKAY on success.
  7257. * @return MP_VAL when a, b, m or r is NULL.
  7258. * @return MP_MEM when dynamic memory allocation fails.
  7259. */
  7260. int sp_submod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7261. {
  7262. int err = MP_OKAY;
  7263. /* Validate parameters. */
  7264. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  7265. err = MP_VAL;
  7266. }
  7267. /* Ensure a, b and m aren't too big a number to operate on. */
  7268. else if (a->used >= SP_INT_DIGITS) {
  7269. err = MP_VAL;
  7270. }
  7271. else if (b->used >= SP_INT_DIGITS) {
  7272. err = MP_VAL;
  7273. }
  7274. else if (m->used >= SP_INT_DIGITS) {
  7275. err = MP_VAL;
  7276. }
  7277. #if 0
  7278. if (err == MP_OKAY) {
  7279. sp_print(a, "a");
  7280. sp_print(b, "b");
  7281. sp_print(m, "m");
  7282. }
  7283. #endif
  7284. if (err == MP_OKAY) {
  7285. /* Do submod. */
  7286. err = _sp_submod(a, b, m, r);
  7287. }
  7288. #if 0
  7289. if (err == MP_OKAY) {
  7290. sp_print(r, "rms");
  7291. }
  7292. #endif
  7293. return err;
  7294. }
  7295. #endif /* WOLFSSL_SP_MATH_ALL */
  7296. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  7297. /* Add two value and reduce: r = (a + b) % m
  7298. *
  7299. * r = a + b (mod m) - constant time (a < m and b < m, a, b and m are positive)
  7300. *
  7301. * Assumes a, b, m and r are not NULL.
  7302. * m and r must not be the same pointer.
  7303. *
  7304. * @param [in] a SP integer to add.
  7305. * @param [in] b SP integer to add with.
  7306. * @param [in] m SP integer that is the modulus.
  7307. * @param [out] r SP integer to hold result.
  7308. *
  7309. * @return MP_OKAY on success.
  7310. */
  7311. int sp_addmod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7312. {
  7313. int err = MP_OKAY;
  7314. #ifndef SQR_MUL_ASM
  7315. sp_int_sword w;
  7316. sp_int_sword s;
  7317. #else
  7318. sp_int_digit wl;
  7319. sp_int_digit wh;
  7320. sp_int_digit sl;
  7321. sp_int_digit sh;
  7322. sp_int_digit t;
  7323. #endif
  7324. sp_int_digit mask;
  7325. sp_int_digit mask_a = (sp_int_digit)-1;
  7326. sp_int_digit mask_b = (sp_int_digit)-1;
  7327. unsigned int i;
  7328. /* Check result is as big as modulus. */
  7329. if (m->used > r->size) {
  7330. err = MP_VAL;
  7331. }
  7332. /* Validate parameters. */
  7333. if ((err == MP_OKAY) && (r == m)) {
  7334. err = MP_VAL;
  7335. }
  7336. if (err == MP_OKAY) {
  7337. #if 0
  7338. sp_print(a, "a");
  7339. sp_print(b, "b");
  7340. sp_print(m, "m");
  7341. #endif
  7342. /* Add a to b into r. Do the subtract of modulus but don't store result.
  7343. * When subtract result is negative, the overflow will be negative.
  7344. * Only need to subtract mod when result is positive - overflow is
  7345. * positive.
  7346. */
  7347. #ifndef SQR_MUL_ASM
  7348. w = 0;
  7349. s = 0;
  7350. #else
  7351. wl = 0;
  7352. sl = 0;
  7353. sh = 0;
  7354. #endif
  7355. /* Constant time - add modulus digits worth from a and b. */
  7356. for (i = 0; i < m->used; i++) {
  7357. /* Values past 'used' are not initialized. */
  7358. mask_a += (i == a->used);
  7359. mask_b += (i == b->used);
  7360. #ifndef SQR_MUL_ASM
  7361. /* Add next digits from a and b to current value. */
  7362. w += a->dp[i] & mask_a;
  7363. w += b->dp[i] & mask_b;
  7364. /* Store low digit in result. */
  7365. r->dp[i] = (sp_int_digit)w;
  7366. /* Add result to reducing value. */
  7367. s += (sp_int_digit)w;
  7368. /* Subtract next digit of modulus. */
  7369. s -= m->dp[i];
  7370. /* Move high digit of reduced result down. */
  7371. s >>= DIGIT_BIT;
  7372. /* Move high digit of sum result down. */
  7373. w >>= DIGIT_BIT;
  7374. #else
  7375. wh = 0;
  7376. /* Add next digits from a and b to current value. */
  7377. t = a->dp[i] & mask_a;
  7378. SP_ASM_ADDC_REG(wl, wh, t);
  7379. t = b->dp[i] & mask_b;
  7380. SP_ASM_ADDC_REG(wl, wh, t);
  7381. /* Store low digit in result. */
  7382. r->dp[i] = wl;
  7383. /* Add result to reducing value. */
  7384. SP_ASM_ADDC_REG(sl, sh, wl);
  7385. /* Subtract next digit of modulus. */
  7386. SP_ASM_SUBB(sl, sh, m->dp[i]);
  7387. /* Move high digit of reduced result down. */
  7388. sl = sh;
  7389. /* High digit is 0 when positive or -1 on negative. */
  7390. sh = (sp_int_digit)0 - (sh >> (SP_WORD_SIZE-1));
  7391. /* Move high digit of sum result down. */
  7392. wl = wh;
  7393. #endif
  7394. }
  7395. #ifndef SQR_MUL_ASM
  7396. /* Add carry into reduced result. */
  7397. s += (sp_int_digit)w;
  7398. /* s will be positive when subtracting modulus is needed. */
  7399. mask = (sp_int_digit)0 - (s >= 0);
  7400. #else
  7401. /* Add carry into reduced result. */
  7402. SP_ASM_ADDC_REG(sl, sh, wl);
  7403. /* s will be positive when subtracting modulus is needed. */
  7404. mask = (sh >> (SP_WORD_SIZE-1)) - 1;
  7405. #endif
  7406. /* Constant time, conditionally, subtract modulus from sum. */
  7407. #ifndef SQR_MUL_ASM
  7408. w = 0;
  7409. #else
  7410. wl = 0;
  7411. wh = 0;
  7412. #endif
  7413. for (i = 0; i < m->used; i++) {
  7414. #ifndef SQR_MUL_ASM
  7415. /* Add result to current value and conditionally subtract modulus.
  7416. */
  7417. w += r->dp[i];
  7418. w -= m->dp[i] & mask;
  7419. /* Store low digit in result. */
  7420. r->dp[i] = (sp_int_digit)w;
  7421. /* Move high digit of sum result down. */
  7422. w >>= DIGIT_BIT;
  7423. #else
  7424. /* Add result to current value and conditionally subtract modulus.
  7425. */
  7426. SP_ASM_ADDC(wl, wh, r->dp[i]);
  7427. t = m->dp[i] & mask;
  7428. SP_ASM_SUBB_REG(wl, wh, t);
  7429. /* Store low digit in result. */
  7430. r->dp[i] = wl;
  7431. /* Move high digit of sum result down. */
  7432. wl = wh;
  7433. /* High digit is 0 when positive or -1 on negative. */
  7434. wh = (sp_int_digit)0 - (wl >> (SP_WORD_SIZE-1));
  7435. #endif
  7436. }
  7437. /* Result will always have digits equal to or less than those in
  7438. * modulus. */
  7439. r->used = i;
  7440. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7441. r->sign = MP_ZPOS;
  7442. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7443. /* Remove leading zeros. */
  7444. sp_clamp(r);
  7445. #if 0
  7446. sp_print(r, "rma");
  7447. #endif
  7448. }
  7449. return err;
  7450. }
  7451. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  7452. #if defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC)
  7453. /* Sub b from a and reduce: r = (a - b) % m
  7454. * Result is always positive.
  7455. *
  7456. * r = a - b (mod m) - constant time (a < m and b < m, a, b and m are positive)
  7457. *
  7458. * Assumes a, b, m and r are not NULL.
  7459. * m and r must not be the same pointer.
  7460. *
  7461. * @param [in] a SP integer to subtract from
  7462. * @param [in] b SP integer to subtract.
  7463. * @param [in] m SP integer that is the modulus.
  7464. * @param [out] r SP integer to hold result.
  7465. *
  7466. * @return MP_OKAY on success.
  7467. */
  7468. int sp_submod_ct(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  7469. {
  7470. int err = MP_OKAY;
  7471. #ifndef SQR_MUL_ASM
  7472. sp_int_sword w;
  7473. #else
  7474. sp_int_digit l;
  7475. sp_int_digit h;
  7476. sp_int_digit t;
  7477. #endif
  7478. sp_int_digit mask;
  7479. sp_int_digit mask_a = (sp_int_digit)-1;
  7480. sp_int_digit mask_b = (sp_int_digit)-1;
  7481. unsigned int i;
  7482. /* Check result is as big as modulus plus one digit. */
  7483. if (m->used > r->size) {
  7484. err = MP_VAL;
  7485. }
  7486. /* Validate parameters. */
  7487. if ((err == MP_OKAY) && (r == m)) {
  7488. err = MP_VAL;
  7489. }
  7490. if (err == MP_OKAY) {
  7491. #if 0
  7492. sp_print(a, "a");
  7493. sp_print(b, "b");
  7494. sp_print(m, "m");
  7495. #endif
  7496. /* In constant time, subtract b from a putting result in r. */
  7497. #ifndef SQR_MUL_ASM
  7498. w = 0;
  7499. #else
  7500. l = 0;
  7501. h = 0;
  7502. #endif
  7503. for (i = 0; i < m->used; i++) {
  7504. /* Values past 'used' are not initialized. */
  7505. mask_a += (i == a->used);
  7506. mask_b += (i == b->used);
  7507. #ifndef SQR_MUL_ASM
  7508. /* Add a to and subtract b from current value. */
  7509. w += a->dp[i] & mask_a;
  7510. w -= b->dp[i] & mask_b;
  7511. /* Store low digit in result. */
  7512. r->dp[i] = (sp_int_digit)w;
  7513. /* Move high digit down. */
  7514. w >>= DIGIT_BIT;
  7515. #else
  7516. /* Add a and subtract b from current value. */
  7517. t = a->dp[i] & mask_a;
  7518. SP_ASM_ADDC_REG(l, h, t);
  7519. t = b->dp[i] & mask_b;
  7520. SP_ASM_SUBB_REG(l, h, t);
  7521. /* Store low digit in result. */
  7522. r->dp[i] = l;
  7523. /* Move high digit down. */
  7524. l = h;
  7525. /* High digit is 0 when positive or -1 on negative. */
  7526. h = (sp_int_digit)0 - (l >> (SP_WORD_SIZE - 1));
  7527. #endif
  7528. }
  7529. /* When w is negative then we need to add modulus to make result
  7530. * positive. */
  7531. #ifndef SQR_MUL_ASM
  7532. mask = (sp_int_digit)0 - (w < 0);
  7533. #else
  7534. mask = h;
  7535. #endif
  7536. /* Constant time, conditionally, add modulus to difference. */
  7537. #ifndef SQR_MUL_ASM
  7538. w = 0;
  7539. #else
  7540. l = 0;
  7541. #endif
  7542. for (i = 0; i < m->used; i++) {
  7543. #ifndef SQR_MUL_ASM
  7544. /* Add result and conditionally modulus to current value. */
  7545. w += r->dp[i];
  7546. w += m->dp[i] & mask;
  7547. /* Store low digit in result. */
  7548. r->dp[i] = (sp_int_digit)w;
  7549. /* Move high digit down. */
  7550. w >>= DIGIT_BIT;
  7551. #else
  7552. h = 0;
  7553. /* Add result and conditionally modulus to current value. */
  7554. SP_ASM_ADDC(l, h, r->dp[i]);
  7555. t = m->dp[i] & mask;
  7556. SP_ASM_ADDC_REG(l, h, t);
  7557. /* Store low digit in result. */
  7558. r->dp[i] = l;
  7559. /* Move high digit down. */
  7560. l = h;
  7561. #endif
  7562. }
  7563. /* Result will always have digits equal to or less than those in
  7564. * modulus. */
  7565. r->used = i;
  7566. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7567. r->sign = MP_ZPOS;
  7568. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7569. /* Remove leading zeros. */
  7570. sp_clamp(r);
  7571. #if 0
  7572. sp_print(r, "rms");
  7573. #endif
  7574. }
  7575. return err;
  7576. }
  7577. #endif /* WOLFSSL_SP_MATH_ALL && HAVE_ECC */
  7578. /********************
  7579. * Shifting functoins
  7580. ********************/
  7581. #if !defined(NO_DH) || defined(HAVE_ECC) || (!defined(NO_RSA) && \
  7582. defined(WC_RSA_BLINDING) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  7583. /* Left shift the multi-precision number by a number of digits.
  7584. *
  7585. * @param [in,out] a SP integer to shift.
  7586. * @param [in] s Number of digits to shift.
  7587. *
  7588. * @return MP_OKAY on success.
  7589. * @return MP_VAL when a is NULL, s is negative or the result is too big.
  7590. */
  7591. int sp_lshd(sp_int* a, int s)
  7592. {
  7593. int err = MP_OKAY;
  7594. /* Validate parameters. */
  7595. if ((a == NULL) || (s < 0)) {
  7596. err = MP_VAL;
  7597. }
  7598. /* Ensure number has enough digits for operation. */
  7599. if ((err == MP_OKAY) && (a->used + (unsigned int)s > a->size)) {
  7600. err = MP_VAL;
  7601. }
  7602. if (err == MP_OKAY) {
  7603. /* Move up digits. */
  7604. XMEMMOVE(a->dp + s, a->dp, a->used * SP_WORD_SIZEOF);
  7605. /* Back fill with zeros. */
  7606. XMEMSET(a->dp, 0, (size_t)s * SP_WORD_SIZEOF);
  7607. /* Update used. */
  7608. a->used += (unsigned int)s;
  7609. /* Remove leading zeros. */
  7610. sp_clamp(a);
  7611. }
  7612. return err;
  7613. }
  7614. #endif
  7615. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7616. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  7617. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  7618. /* Left shift the multi-precision number by n bits.
  7619. * Bits may be larger than the word size.
  7620. *
  7621. * Used by sp_mul_2d() and other internal functions.
  7622. *
  7623. * @param [in,out] a SP integer to shift.
  7624. * @param [in] n Number of bits to shift left.
  7625. *
  7626. * @return MP_OKAY on success.
  7627. * @return MP_VAL when the result is too big.
  7628. */
  7629. static int sp_lshb(sp_int* a, int n)
  7630. {
  7631. int err = MP_OKAY;
  7632. if (a->used != 0) {
  7633. /* Calculate number of digits to shift. */
  7634. unsigned int s = (unsigned int)n >> SP_WORD_SHIFT;
  7635. /* Ensure number has enough digits for result. */
  7636. if (a->used + s >= a->size) {
  7637. err = MP_VAL;
  7638. }
  7639. if (err == MP_OKAY) {
  7640. /* Get count of bits to move in digit. */
  7641. n &= SP_WORD_MASK;
  7642. /* Check whether this is a complicated case. */
  7643. if (n != 0) {
  7644. unsigned int i;
  7645. /* Shift up starting at most significant digit. */
  7646. /* Get new most significant digit. */
  7647. sp_int_digit v = a->dp[a->used - 1] >> (SP_WORD_SIZE - n);
  7648. /* Shift up each digit. */
  7649. for (i = a->used - 1; i >= 1; i--) {
  7650. a->dp[i + s] = (a->dp[i] << n) |
  7651. (a->dp[i - 1] >> (SP_WORD_SIZE - n));
  7652. }
  7653. /* Shift up least significant digit. */
  7654. a->dp[s] = a->dp[0] << n;
  7655. /* Add new high digit unless zero. */
  7656. if (v != 0) {
  7657. a->dp[a->used + s] = v;
  7658. a->used++;
  7659. }
  7660. }
  7661. /* Only digits to move and ensure not zero. */
  7662. else if (s > 0) {
  7663. /* Move up digits. */
  7664. XMEMMOVE(a->dp + s, a->dp, a->used * SP_WORD_SIZEOF);
  7665. }
  7666. /* Update used digit count. */
  7667. a->used += s;
  7668. /* Back fill with zeros. */
  7669. XMEMSET(a->dp, 0, SP_WORD_SIZEOF * s);
  7670. }
  7671. }
  7672. return err;
  7673. }
  7674. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  7675. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  7676. #ifdef WOLFSSL_SP_MATH_ALL
  7677. /* Shift a right by c digits: a = a >> (n * SP_WORD_SIZE)
  7678. *
  7679. * @param [in, out] a SP integer to shift.
  7680. * @param [in] c Number of digits to shift.
  7681. */
  7682. void sp_rshd(sp_int* a, int c)
  7683. {
  7684. /* Do shift if we have an SP int. */
  7685. if ((a != NULL) && (c > 0)) {
  7686. /* Make zero if shift removes all digits. */
  7687. if ((unsigned int)c >= a->used) {
  7688. _sp_zero(a);
  7689. }
  7690. else {
  7691. unsigned int i;
  7692. /* Update used digits count. */
  7693. a->used -= (unsigned int)c;
  7694. /* Move digits down. */
  7695. for (i = 0; i < a->used; i++, c++) {
  7696. a->dp[i] = a->dp[c];
  7697. }
  7698. }
  7699. }
  7700. }
  7701. #endif /* WOLFSSL_SP_MATH_ALL */
  7702. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7703. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  7704. defined(WOLFSSL_HAVE_SP_DH)
  7705. /* Shift a right by n bits into r: r = a >> n
  7706. *
  7707. * @param [in] a SP integer to shift.
  7708. * @param [in] n Number of bits to shift.
  7709. * @param [out] r SP integer to store result in.
  7710. */
  7711. int sp_rshb(const sp_int* a, int n, sp_int* r)
  7712. {
  7713. int err = MP_OKAY;
  7714. /* Number of digits to shift down. */
  7715. unsigned int i = (unsigned int)(n >> SP_WORD_SHIFT);
  7716. if ((a == NULL) || (n < 0)) {
  7717. err = MP_VAL;
  7718. }
  7719. /* Handle case where shifting out all digits. */
  7720. if ((err == MP_OKAY) && (i >= a->used)) {
  7721. _sp_zero(r);
  7722. }
  7723. /* Change callers when more error cases returned. */
  7724. else if ((err == MP_OKAY) && (a->used - i > r->size)) {
  7725. err = MP_VAL;
  7726. }
  7727. else if (err == MP_OKAY) {
  7728. unsigned int j;
  7729. /* Number of bits to shift in digits. */
  7730. n &= SP_WORD_SIZE - 1;
  7731. /* Handle simple case. */
  7732. if (n == 0) {
  7733. /* Set the count of used digits. */
  7734. r->used = a->used - i;
  7735. /* Move digits down. */
  7736. if (r == a) {
  7737. XMEMMOVE(r->dp, r->dp + i, SP_WORD_SIZEOF * r->used);
  7738. }
  7739. else {
  7740. XMEMCPY(r->dp, a->dp + i, SP_WORD_SIZEOF * r->used);
  7741. }
  7742. }
  7743. else {
  7744. /* Move the bits down starting at least significant digit. */
  7745. for (j = 0; i < a->used-1; i++, j++)
  7746. r->dp[j] = (a->dp[i] >> n) | (a->dp[i+1] << (SP_WORD_SIZE - n));
  7747. /* Most significant digit has no higher digit to pull from. */
  7748. r->dp[j] = a->dp[i] >> n;
  7749. /* Set the count of used digits. */
  7750. r->used = j + (r->dp[j] > 0);
  7751. }
  7752. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7753. if (sp_iszero(r)) {
  7754. /* Set zero sign. */
  7755. r->sign = MP_ZPOS;
  7756. }
  7757. else {
  7758. /* Retain sign. */
  7759. r->sign = a->sign;
  7760. }
  7761. #endif
  7762. }
  7763. return err;
  7764. }
  7765. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC ||
  7766. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) || WOLFSSL_HAVE_SP_DH */
  7767. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  7768. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  7769. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  7770. static void _sp_div_same_size(sp_int* a, const sp_int* d, sp_int* r)
  7771. {
  7772. unsigned int i;
  7773. /* Compare top digits of dividend with those of divisor up to last. */
  7774. for (i = d->used - 1; i > 0; i--) {
  7775. /* Break if top divisor is not equal to dividend. */
  7776. if (a->dp[a->used - d->used + i] != d->dp[i]) {
  7777. break;
  7778. }
  7779. }
  7780. /* Check if top dividend is greater than or equal to divisor. */
  7781. if (a->dp[a->used - d->used + i] >= d->dp[i]) {
  7782. /* Update quotient result. */
  7783. r->dp[a->used - d->used] += 1;
  7784. /* Get 'used' to restore - ensure zeros put into quotient. */
  7785. i = a->used;
  7786. /* Subtract d from top of a. */
  7787. _sp_sub_off(a, d, a, a->used - d->used);
  7788. /* Restore 'used' on remainder. */
  7789. a->used = i;
  7790. }
  7791. }
  7792. /* Divide a by d and return the quotient in r and the remainder in a.
  7793. * r = a / d; a = a % d
  7794. *
  7795. * Note: a is constantly having multiplies of d subtracted.
  7796. *
  7797. * @param [in, out] a SP integer to be divided and remainder on out.
  7798. * @param [in] d SP integer to divide by.
  7799. * @param [out] r SP integer that is the quotient.
  7800. * @param [out] trial SP integer that is product in trial division.
  7801. *
  7802. * @return MP_OKAY on success.
  7803. * @return MP_VAL when operation fails - only when compiling small code.
  7804. */
  7805. static int _sp_div_impl(sp_int* a, const sp_int* d, sp_int* r, sp_int* trial)
  7806. {
  7807. int err = MP_OKAY;
  7808. unsigned int i;
  7809. #ifdef WOLFSSL_SP_SMALL
  7810. int c;
  7811. #else
  7812. unsigned int j;
  7813. unsigned int o;
  7814. #ifndef SQR_MUL_ASM
  7815. sp_int_sword sw;
  7816. #else
  7817. sp_int_digit sl;
  7818. sp_int_digit sh;
  7819. sp_int_digit st;
  7820. #endif
  7821. #endif /* WOLFSSL_SP_SMALL */
  7822. sp_int_digit t;
  7823. sp_int_digit dt;
  7824. /* Set result size to clear. */
  7825. r->used = a->used - d->used + 1;
  7826. /* Set all potentially used digits to zero. */
  7827. for (i = 0; i < r->used; i++) {
  7828. r->dp[i] = 0;
  7829. }
  7830. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7831. r->sign = MP_ZPOS;
  7832. #endif
  7833. /* Get the most significant digit (will have top bit set). */
  7834. dt = d->dp[d->used-1];
  7835. /* Handle when a >= d ^ (2 ^ (SP_WORD_SIZE * x)). */
  7836. _sp_div_same_size(a, d, r);
  7837. /* Keep subtracting multiples of d as long as the digit count of a is
  7838. * greater than equal to d.
  7839. */
  7840. for (i = a->used - 1; i >= d->used; i--) {
  7841. /* When top digits equal, guestimate maximum multiplier.
  7842. * Worst case, multiplier is actually SP_DIGIT_MAX - 1.
  7843. * That is, for w (word size in bits) > 1, n > 1, let:
  7844. * a = 2^((n+1)*w-1), d = 2^(n*w-1) + 2^((n-1)*w) - 1, t = 2^w - 2
  7845. * Then,
  7846. * d * t
  7847. * = (2^(n*w-1) + 2^((n-1)*w) - 1) * (2^w - 2)
  7848. * = 2^((n+1)*w-1) - 2^(n*w) + 2^(n*w) - 2^((n-1)*w+1) - 2^w + 2
  7849. * = 2^((n+1)*w-1) - 2^((n-1)*w+1) - 2^w + 2
  7850. * = a - 2^((n-1)*w+1) - 2^w + 2
  7851. * d > 2^((n-1)*w+1) + 2^w - 2, when w > 1, n > 1
  7852. */
  7853. if (a->dp[i] == dt) {
  7854. t = SP_DIGIT_MAX;
  7855. }
  7856. else {
  7857. /* Calculate trial quotient by dividing top word of dividend by top
  7858. * digit of divisor.
  7859. * Some implementations segfault when quotient > SP_DIGIT_MAX.
  7860. * Implementations in assembly, using builtins or using
  7861. * digits only (WOLFSSL_SP_DIV_WORD_HALF).
  7862. */
  7863. t = sp_div_word(a->dp[i], a->dp[i-1], dt);
  7864. }
  7865. #ifdef WOLFSSL_SP_SMALL
  7866. do {
  7867. /* Calculate trial from trial quotient. */
  7868. err = _sp_mul_d(d, t, trial, i - d->used);
  7869. if (err != MP_OKAY) {
  7870. break;
  7871. }
  7872. /* Check if trial is bigger. */
  7873. c = _sp_cmp_abs(trial, a);
  7874. if (c == MP_GT) {
  7875. /* Decrement trial quotient and try again. */
  7876. t--;
  7877. }
  7878. }
  7879. while (c == MP_GT);
  7880. if (err != MP_OKAY) {
  7881. break;
  7882. }
  7883. /* Subtract the trial and add qoutient to result. */
  7884. _sp_sub_off(a, trial, a, 0);
  7885. r->dp[i - d->used] += t;
  7886. /* Handle overflow of digit. */
  7887. if (r->dp[i - d->used] < t) {
  7888. r->dp[i + 1 - d->used]++;
  7889. }
  7890. #else
  7891. /* Index of lowest digit trial is subtracted from. */
  7892. o = i - d->used;
  7893. do {
  7894. #ifndef SQR_MUL_ASM
  7895. sp_int_word tw = 0;
  7896. #else
  7897. sp_int_digit tl = 0;
  7898. sp_int_digit th = 0;
  7899. #endif
  7900. /* Multiply divisor by trial quotient. */
  7901. for (j = 0; j < d->used; j++) {
  7902. #ifndef SQR_MUL_ASM
  7903. tw += (sp_int_word)d->dp[j] * t;
  7904. trial->dp[j] = (sp_int_digit)tw;
  7905. tw >>= SP_WORD_SIZE;
  7906. #else
  7907. SP_ASM_MUL_ADD_NO(tl, th, d->dp[j], t);
  7908. trial->dp[j] = tl;
  7909. tl = th;
  7910. th = 0;
  7911. #endif
  7912. }
  7913. #ifndef SQR_MUL_ASM
  7914. trial->dp[j] = (sp_int_digit)tw;
  7915. #else
  7916. trial->dp[j] = tl;
  7917. #endif
  7918. /* Check trial quotient isn't larger than dividend. */
  7919. for (j = d->used; j > 0; j--) {
  7920. if (trial->dp[j] != a->dp[j + o]) {
  7921. break;
  7922. }
  7923. }
  7924. /* Decrement trial quotient if larger and try again. */
  7925. if (trial->dp[j] > a->dp[j + o]) {
  7926. t--;
  7927. }
  7928. }
  7929. while (trial->dp[j] > a->dp[j + o]);
  7930. #ifndef SQR_MUL_ASM
  7931. sw = 0;
  7932. #else
  7933. sl = 0;
  7934. sh = 0;
  7935. #endif
  7936. /* Subtract trial - don't need to update used. */
  7937. for (j = 0; j <= d->used; j++) {
  7938. #ifndef SQR_MUL_ASM
  7939. sw += a->dp[j + o];
  7940. sw -= trial->dp[j];
  7941. a->dp[j + o] = (sp_int_digit)sw;
  7942. sw >>= SP_WORD_SIZE;
  7943. #else
  7944. st = a->dp[j + o];
  7945. SP_ASM_ADDC(sl, sh, st);
  7946. st = trial->dp[j];
  7947. SP_ASM_SUBB(sl, sh, st);
  7948. a->dp[j + o] = sl;
  7949. sl = sh;
  7950. sh = (sp_int_digit)0 - (sl >> (SP_WORD_SIZE - 1));
  7951. #endif
  7952. }
  7953. r->dp[o] = t;
  7954. #endif /* WOLFSSL_SP_SMALL */
  7955. }
  7956. /* Update used. */
  7957. a->used = i + 1;
  7958. if (a->used == d->used) {
  7959. /* Finish div now that length of dividend is same as divisor. */
  7960. _sp_div_same_size(a, d, r);
  7961. }
  7962. return err;
  7963. }
  7964. /* Divide a by d and return the quotient in r and the remainder in rem.
  7965. * r = a / d; rem = a % d
  7966. *
  7967. * @param [in] a SP integer to be divided.
  7968. * @param [in] d SP integer to divide by.
  7969. * @param [out] r SP integer that is the quotient.
  7970. * @param [out] rem SP integer that is the remainder.
  7971. * @param [in] used Number of digits in temporaries to use.
  7972. *
  7973. * @return MP_OKAY on success.
  7974. * @return MP_MEM when dynamic memory allocation fails.
  7975. */
  7976. static int _sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem,
  7977. unsigned int used)
  7978. {
  7979. int err = MP_OKAY;
  7980. int ret;
  7981. int done = 0;
  7982. int s = 0;
  7983. sp_int* sa = NULL;
  7984. sp_int* sd = NULL;
  7985. sp_int* tr = NULL;
  7986. sp_int* trial = NULL;
  7987. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7988. unsigned int signA = MP_ZPOS;
  7989. unsigned int signD = MP_ZPOS;
  7990. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7991. /* Intermediates will always be less than or equal to dividend. */
  7992. DECL_SP_INT_ARRAY(td, used, 4);
  7993. #ifdef WOLFSSL_SP_INT_NEGATIVE
  7994. /* Cache sign for results. */
  7995. signA = a->sign;
  7996. signD = d->sign;
  7997. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  7998. /* Handle simple case of: dividend < divisor. */
  7999. ret = _sp_cmp_abs(a, d);
  8000. if (ret == MP_LT) {
  8001. /* a = 0 * d + a */
  8002. if ((rem != NULL) && (a != rem)) {
  8003. _sp_copy(a, rem);
  8004. }
  8005. if (r != NULL) {
  8006. _sp_set(r, 0);
  8007. }
  8008. done = 1;
  8009. }
  8010. /* Handle simple case of: dividend == divisor. */
  8011. else if (ret == MP_EQ) {
  8012. /* a = 1 * d + 0 */
  8013. if (rem != NULL) {
  8014. _sp_set(rem, 0);
  8015. }
  8016. if (r != NULL) {
  8017. _sp_set(r, 1);
  8018. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8019. r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
  8020. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8021. }
  8022. done = 1;
  8023. }
  8024. else if (sp_count_bits(a) == sp_count_bits(d)) {
  8025. /* a is greater than d but same bit length - subtract. */
  8026. if (rem != NULL) {
  8027. _sp_sub_off(a, d, rem, 0);
  8028. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8029. rem->sign = signA;
  8030. #endif
  8031. }
  8032. if (r != NULL) {
  8033. _sp_set(r, 1);
  8034. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8035. r->sign = (signA == signD) ? MP_ZPOS : MP_NEG;
  8036. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8037. }
  8038. done = 1;
  8039. }
  8040. /* Allocate temporary 'sp_int's and assign. */
  8041. if ((!done) && (err == MP_OKAY)) {
  8042. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  8043. !defined(WOLFSSL_SP_NO_MALLOC)
  8044. int cnt = 4;
  8045. /* Reuse remainder sp_int where possible. */
  8046. if ((rem != NULL) && (rem != d) && (rem->size > a->used)) {
  8047. sa = rem;
  8048. cnt--;
  8049. }
  8050. /* Reuse result sp_int where possible. */
  8051. if ((r != NULL) && (r != d)) {
  8052. tr = r;
  8053. cnt--;
  8054. }
  8055. /* Macro always has code associated with it and checks err first. */
  8056. ALLOC_SP_INT_ARRAY(td, used, cnt, err, NULL);
  8057. #else
  8058. ALLOC_SP_INT_ARRAY(td, used, 4, err, NULL);
  8059. #endif
  8060. }
  8061. if ((!done) && (err == MP_OKAY)) {
  8062. #if (defined(WOLFSSL_SMALL_STACK) || defined(SP_ALLOC)) && \
  8063. !defined(WOLFSSL_SP_NO_MALLOC)
  8064. int i = 2;
  8065. /* Set to temporary when not reusing. */
  8066. if (sa == NULL) {
  8067. sa = td[i++];
  8068. _sp_init_size(sa, used);
  8069. }
  8070. if (tr == NULL) {
  8071. tr = td[i];
  8072. _sp_init_size(tr, a->used - d->used + 2);
  8073. }
  8074. #else
  8075. sa = td[2];
  8076. tr = td[3];
  8077. _sp_init_size(sa, used);
  8078. _sp_init_size(tr, a->used - d->used + 2);
  8079. #endif
  8080. sd = td[0];
  8081. trial = td[1];
  8082. /* Initialize sizes to minimal values. */
  8083. _sp_init_size(sd, d->used + 1);
  8084. _sp_init_size(trial, used);
  8085. /* Move divisor to top of word. Adjust dividend as well. */
  8086. s = sp_count_bits(d);
  8087. s = SP_WORD_SIZE - (s & SP_WORD_MASK);
  8088. _sp_copy(a, sa);
  8089. /* Only shift if top bit of divisor no set. */
  8090. if (s != SP_WORD_SIZE) {
  8091. err = sp_lshb(sa, s);
  8092. if (err == MP_OKAY) {
  8093. _sp_copy(d, sd);
  8094. d = sd;
  8095. err = sp_lshb(sd, s);
  8096. }
  8097. }
  8098. }
  8099. if ((!done) && (err == MP_OKAY) && (d->used > 0)) {
  8100. /* Do division: tr = sa / d, sa = sa % d. */
  8101. err = _sp_div_impl(sa, d, tr, trial);
  8102. /* Return the remainder if required. */
  8103. if ((err == MP_OKAY) && (rem != NULL)) {
  8104. /* Move result back down if moved up for divisor value. */
  8105. if (s != SP_WORD_SIZE) {
  8106. (void)sp_rshb(sa, s, sa);
  8107. }
  8108. _sp_copy(sa, rem);
  8109. sp_clamp(rem);
  8110. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8111. rem->sign = (rem->used == 0) ? MP_ZPOS : signA;
  8112. #endif
  8113. }
  8114. /* Return the quotient if required. */
  8115. if ((err == MP_OKAY) && (r != NULL)) {
  8116. _sp_copy(tr, r);
  8117. sp_clamp(r);
  8118. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8119. if ((r->used == 0) || (signA == signD)) {
  8120. r->sign = MP_ZPOS;
  8121. }
  8122. else {
  8123. r->sign = MP_NEG;
  8124. }
  8125. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8126. }
  8127. }
  8128. FREE_SP_INT_ARRAY(td, NULL);
  8129. return err;
  8130. }
  8131. /* Divide a by d and return the quotient in r and the remainder in rem.
  8132. * r = a / d; rem = a % d
  8133. *
  8134. * @param [in] a SP integer to be divided.
  8135. * @param [in] d SP integer to divide by.
  8136. * @param [out] r SP integer that is the quotient.
  8137. * @param [out] rem SP integer that is the remainder.
  8138. *
  8139. * @return MP_OKAY on success.
  8140. * @return MP_VAL when a or d is NULL, r and rem are NULL, or d is 0.
  8141. * @return MP_MEM when dynamic memory allocation fails.
  8142. */
  8143. int sp_div(const sp_int* a, const sp_int* d, sp_int* r, sp_int* rem)
  8144. {
  8145. int err = MP_OKAY;
  8146. unsigned int used = 1;
  8147. /* Validate parameters. */
  8148. if ((a == NULL) || (d == NULL) || ((r == NULL) && (rem == NULL))) {
  8149. err = MP_VAL;
  8150. }
  8151. /* a / 0 = infinity. */
  8152. if ((err == MP_OKAY) && sp_iszero(d)) {
  8153. err = MP_VAL;
  8154. }
  8155. /* Ensure quotient result has enough memory. */
  8156. if ((err == MP_OKAY) && (r != NULL) && (r->size < a->used - d->used + 2)) {
  8157. err = MP_VAL;
  8158. }
  8159. if ((err == MP_OKAY) && (rem != NULL)) {
  8160. /* Ensure remainder has enough memory. */
  8161. if ((a->used <= d->used) && (rem->size < a->used + 1)) {
  8162. err = MP_VAL;
  8163. }
  8164. else if ((a->used > d->used) && (rem->size < d->used + 1)) {
  8165. err = MP_VAL;
  8166. }
  8167. }
  8168. if (err == MP_OKAY) {
  8169. if (a->used == SP_INT_DIGITS) {
  8170. /* May need to shift number being divided left into a new word. */
  8171. int bits = SP_WORD_SIZE - (sp_count_bits(d) % SP_WORD_SIZE);
  8172. if ((bits != SP_WORD_SIZE) &&
  8173. (sp_count_bits(a) + bits > SP_INT_DIGITS * SP_WORD_SIZE)) {
  8174. err = MP_VAL;
  8175. }
  8176. else {
  8177. used = SP_INT_DIGITS;
  8178. }
  8179. }
  8180. else {
  8181. used = a->used + 1;
  8182. }
  8183. }
  8184. if (err == MP_OKAY) {
  8185. #if 0
  8186. sp_print(a, "a");
  8187. sp_print(d, "b");
  8188. #endif
  8189. /* Do operation. */
  8190. err = _sp_div(a, d, r, rem, used);
  8191. #if 0
  8192. if (err == MP_OKAY) {
  8193. if (rem != NULL) {
  8194. sp_print(rem, "rdr");
  8195. }
  8196. if (r != NULL) {
  8197. sp_print(r, "rdw");
  8198. }
  8199. }
  8200. #endif
  8201. }
  8202. return err;
  8203. }
  8204. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
  8205. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  8206. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(HAVE_ECC) || \
  8207. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  8208. !defined(WOLFSSL_RSA_PUBLIC_ONLY))
  8209. #ifndef FREESCALE_LTC_TFM
  8210. #ifdef WOLFSSL_SP_INT_NEGATIVE
  8211. /* Calculate the remainder of dividing a by m: r = a mod m. r is m.
  8212. *
  8213. * @param [in] a SP integer to reduce.
  8214. * @param [in] m SP integer that is the modulus.
  8215. * @param [out] r SP integer to store result in.
  8216. *
  8217. * @return MP_OKAY on success.
  8218. * @return MP_MEM when dynamic memory allocation fails.
  8219. */
  8220. static int _sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
  8221. {
  8222. int err = MP_OKAY;
  8223. /* Remainder will start as a. */
  8224. DECL_SP_INT(t, (a == NULL) ? 1 : a->used + 1);
  8225. /* In case remainder is modulus - allocate temporary. */
  8226. ALLOC_SP_INT(t, a->used + 1, err, NULL);
  8227. if (err == MP_OKAY) {
  8228. _sp_init_size(t, a->used + 1);
  8229. /* Use divide to calculate remainder and don't get quotient. */
  8230. err = sp_div(a, m, NULL, t);
  8231. }
  8232. if (err == MP_OKAY) {
  8233. /* Make remainder positive and copy into result. */
  8234. if ((!sp_iszero(t)) && (t->sign != m->sign)) {
  8235. err = sp_add(t, m, r);
  8236. }
  8237. else {
  8238. _sp_copy(t, r);
  8239. }
  8240. }
  8241. FREE_SP_INT(t, NULL);
  8242. return err;
  8243. }
  8244. #endif
  8245. /* Calculate the remainder of dividing a by m: r = a mod m.
  8246. *
  8247. * @param [in] a SP integer to reduce.
  8248. * @param [in] m SP integer that is the modulus.
  8249. * @param [out] r SP integer to store result in.
  8250. *
  8251. * @return MP_OKAY on success.
  8252. * @return MP_VAL when a, m or r is NULL or m is 0.
  8253. * @return MP_MEM when dynamic memory allocation fails.
  8254. */
  8255. int sp_mod(const sp_int* a, const sp_int* m, sp_int* r)
  8256. {
  8257. int err = MP_OKAY;
  8258. /* Validate parameters. */
  8259. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  8260. err = MP_VAL;
  8261. }
  8262. /* Ensure a isn't too big a number to operate on. */
  8263. else if (a->used >= SP_INT_DIGITS) {
  8264. err = MP_VAL;
  8265. }
  8266. #ifndef WOLFSSL_SP_INT_NEGATIVE
  8267. if (err == MP_OKAY) {
  8268. /* Use divide to calculate remainder and don't get quotient. */
  8269. err = sp_div(a, m, NULL, r);
  8270. }
  8271. #else
  8272. if ((err == MP_OKAY) && (r != m)) {
  8273. err = sp_div(a, m, NULL, r);
  8274. if ((err == MP_OKAY) && (!sp_iszero(r)) && (r->sign != m->sign)) {
  8275. err = sp_add(r, m, r);
  8276. }
  8277. }
  8278. else if (err == MP_OKAY) {
  8279. err = _sp_mod(a, m, r);
  8280. }
  8281. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  8282. return err;
  8283. }
  8284. #endif /* !FREESCALE_LTC_TFM */
  8285. #endif /* WOLFSSL_SP_MATH_ALL || !NO_DH || HAVE_ECC || \
  8286. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  8287. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  8288. defined(HAVE_ECC) || !defined(NO_RSA)
  8289. /* START SP_MUL implementations. */
  8290. /* This code is generated.
  8291. * To generate:
  8292. * cd scripts/sp/sp_int
  8293. * ./gen.sh
  8294. * File sp_mul.c contains code.
  8295. */
  8296. #ifdef SQR_MUL_ASM
  8297. /* Multiply a by b into r where a and b have same no. digits. r = a * b
  8298. *
  8299. * Optimised code for when number of digits in a and b are the same.
  8300. *
  8301. * @param [in] a SP integer to mulitply.
  8302. * @param [in] b SP integer to mulitply by.
  8303. * @param [out] r SP integer to hod reult.
  8304. *
  8305. * @return MP_OKAY otherwise.
  8306. * @return MP_MEM when dynamic memory allocation fails.
  8307. */
  8308. static int _sp_mul_nxn(const sp_int* a, const sp_int* b, sp_int* r)
  8309. {
  8310. int err = MP_OKAY;
  8311. unsigned int i;
  8312. int j;
  8313. unsigned int k;
  8314. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8315. sp_int_digit* t = NULL;
  8316. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  8317. !defined(WOLFSSL_SP_NO_DYN_STACK)
  8318. sp_int_digit t[a->used];
  8319. #else
  8320. sp_int_digit t[SP_INT_DIGITS / 2];
  8321. #endif
  8322. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8323. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * a->used, NULL,
  8324. DYNAMIC_TYPE_BIGINT);
  8325. if (t == NULL) {
  8326. err = MP_MEM;
  8327. }
  8328. #endif
  8329. if (err == MP_OKAY) {
  8330. sp_int_digit l;
  8331. sp_int_digit h;
  8332. sp_int_digit o;
  8333. const sp_int_digit* dp;
  8334. h = 0;
  8335. l = 0;
  8336. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8337. t[0] = h;
  8338. h = 0;
  8339. o = 0;
  8340. for (k = 1; k <= a->used - 1; k++) {
  8341. j = (int)k;
  8342. dp = a->dp;
  8343. for (; j >= 0; dp++, j--) {
  8344. SP_ASM_MUL_ADD(l, h, o, dp[0], b->dp[j]);
  8345. }
  8346. t[k] = l;
  8347. l = h;
  8348. h = o;
  8349. o = 0;
  8350. }
  8351. for (; k <= (a->used - 1) * 2; k++) {
  8352. i = k - (b->used - 1);
  8353. dp = &b->dp[b->used - 1];
  8354. for (; i < a->used; i++, dp--) {
  8355. SP_ASM_MUL_ADD(l, h, o, a->dp[i], dp[0]);
  8356. }
  8357. r->dp[k] = l;
  8358. l = h;
  8359. h = o;
  8360. o = 0;
  8361. }
  8362. r->dp[k] = l;
  8363. XMEMCPY(r->dp, t, a->used * sizeof(sp_int_digit));
  8364. r->used = k + 1;
  8365. sp_clamp(r);
  8366. }
  8367. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8368. if (t != NULL) {
  8369. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8370. }
  8371. #endif
  8372. return err;
  8373. }
  8374. /* Multiply a by b into r. r = a * b
  8375. *
  8376. * @param [in] a SP integer to mulitply.
  8377. * @param [in] b SP integer to mulitply by.
  8378. * @param [out] r SP integer to hod reult.
  8379. *
  8380. * @return MP_OKAY otherwise.
  8381. * @return MP_MEM when dynamic memory allocation fails.
  8382. */
  8383. static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
  8384. {
  8385. int err = MP_OKAY;
  8386. unsigned int i;
  8387. int j;
  8388. unsigned int k;
  8389. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8390. sp_int_digit* t = NULL;
  8391. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  8392. !defined(WOLFSSL_SP_NO_DYN_STACK)
  8393. sp_int_digit t[a->used + b->used];
  8394. #else
  8395. sp_int_digit t[SP_INT_DIGITS];
  8396. #endif
  8397. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8398. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
  8399. DYNAMIC_TYPE_BIGINT);
  8400. if (t == NULL) {
  8401. err = MP_MEM;
  8402. }
  8403. #endif
  8404. if (err == MP_OKAY) {
  8405. sp_int_digit l;
  8406. sp_int_digit h;
  8407. sp_int_digit o;
  8408. h = 0;
  8409. l = 0;
  8410. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8411. t[0] = h;
  8412. h = 0;
  8413. o = 0;
  8414. for (k = 1; k <= b->used - 1; k++) {
  8415. i = 0;
  8416. j = (int)k;
  8417. for (; (i < a->used) && (j >= 0); i++, j--) {
  8418. SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
  8419. }
  8420. t[k] = l;
  8421. l = h;
  8422. h = o;
  8423. o = 0;
  8424. }
  8425. for (; k <= (a->used - 1) + (b->used - 1); k++) {
  8426. j = (int)(b->used - 1);
  8427. i = k - (unsigned int)j;
  8428. for (; (i < a->used) && (j >= 0); i++, j--) {
  8429. SP_ASM_MUL_ADD(l, h, o, a->dp[i], b->dp[j]);
  8430. }
  8431. t[k] = l;
  8432. l = h;
  8433. h = o;
  8434. o = 0;
  8435. }
  8436. t[k] = l;
  8437. r->used = k + 1;
  8438. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  8439. sp_clamp(r);
  8440. }
  8441. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8442. if (t != NULL) {
  8443. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8444. }
  8445. #endif
  8446. return err;
  8447. }
  8448. #else
  8449. /* Multiply a by b into r. r = a * b
  8450. *
  8451. * @param [in] a SP integer to mulitply.
  8452. * @param [in] b SP integer to mulitply by.
  8453. * @param [out] r SP integer to hod reult.
  8454. *
  8455. * @return MP_OKAY otherwise.
  8456. * @return MP_MEM when dynamic memory allocation fails.
  8457. */
  8458. static int _sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
  8459. {
  8460. int err = MP_OKAY;
  8461. unsigned int i;
  8462. int j;
  8463. unsigned int k;
  8464. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8465. sp_int_digit* t = NULL;
  8466. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  8467. !defined(WOLFSSL_SP_NO_DYN_STACK)
  8468. sp_int_digit t[a->used + b->used];
  8469. #else
  8470. sp_int_digit t[SP_INT_DIGITS];
  8471. #endif
  8472. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8473. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used + b->used), NULL,
  8474. DYNAMIC_TYPE_BIGINT);
  8475. if (t == NULL) {
  8476. err = MP_MEM;
  8477. }
  8478. #endif
  8479. if (err == MP_OKAY) {
  8480. sp_int_word w;
  8481. sp_int_word l;
  8482. sp_int_word h;
  8483. #ifdef SP_WORD_OVERFLOW
  8484. sp_int_word o;
  8485. #endif
  8486. w = (sp_int_word)a->dp[0] * b->dp[0];
  8487. t[0] = (sp_int_digit)w;
  8488. l = (sp_int_digit)(w >> SP_WORD_SIZE);
  8489. h = 0;
  8490. #ifdef SP_WORD_OVERFLOW
  8491. o = 0;
  8492. #endif
  8493. for (k = 1; k <= (a->used - 1) + (b->used - 1); k++) {
  8494. i = k - (b->used - 1);
  8495. i &= (((unsigned int)i >> (sizeof(i) * 8 - 1)) - 1U);
  8496. j = (int)(k - i);
  8497. for (; (i < a->used) && (j >= 0); i++, j--) {
  8498. w = (sp_int_word)a->dp[i] * b->dp[j];
  8499. l += (sp_int_digit)w;
  8500. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  8501. #ifdef SP_WORD_OVERFLOW
  8502. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  8503. l &= SP_MASK;
  8504. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  8505. h &= SP_MASK;
  8506. #endif
  8507. }
  8508. t[k] = (sp_int_digit)l;
  8509. l >>= SP_WORD_SIZE;
  8510. l += (sp_int_digit)h;
  8511. h >>= SP_WORD_SIZE;
  8512. #ifdef SP_WORD_OVERFLOW
  8513. h += o & SP_MASK;
  8514. o >>= SP_WORD_SIZE;
  8515. #endif
  8516. }
  8517. t[k] = (sp_int_digit)l;
  8518. r->used = k + 1;
  8519. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  8520. sp_clamp(r);
  8521. }
  8522. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8523. if (t != NULL) {
  8524. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  8525. }
  8526. #endif
  8527. return err;
  8528. }
  8529. #endif
  8530. #ifndef WOLFSSL_SP_SMALL
  8531. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  8532. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  8533. #ifndef SQR_MUL_ASM
  8534. /* Multiply a by b and store in r: r = a * b
  8535. *
  8536. * Long-hand implementation.
  8537. *
  8538. * @param [in] a SP integer to multiply.
  8539. * @param [in] b SP integer to multiply.
  8540. * @param [out] r SP integer result.
  8541. *
  8542. * @return MP_OKAY on success.
  8543. * @return MP_MEM when dynamic memory allocation fails.
  8544. */
  8545. static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
  8546. {
  8547. int err = MP_OKAY;
  8548. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8549. sp_int_word* w = NULL;
  8550. #else
  8551. sp_int_word w[16];
  8552. #endif
  8553. const sp_int_digit* da = a->dp;
  8554. const sp_int_digit* db = b->dp;
  8555. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8556. w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 16, NULL,
  8557. DYNAMIC_TYPE_BIGINT);
  8558. if (w == NULL) {
  8559. err = MP_MEM;
  8560. }
  8561. #endif
  8562. if (err == MP_OKAY) {
  8563. w[0] = (sp_int_word)da[0] * db[0];
  8564. w[1] = (sp_int_word)da[0] * db[1];
  8565. w[2] = (sp_int_word)da[1] * db[0];
  8566. w[3] = (sp_int_word)da[0] * db[2];
  8567. w[4] = (sp_int_word)da[1] * db[1];
  8568. w[5] = (sp_int_word)da[2] * db[0];
  8569. w[6] = (sp_int_word)da[0] * db[3];
  8570. w[7] = (sp_int_word)da[1] * db[2];
  8571. w[8] = (sp_int_word)da[2] * db[1];
  8572. w[9] = (sp_int_word)da[3] * db[0];
  8573. w[10] = (sp_int_word)da[1] * db[3];
  8574. w[11] = (sp_int_word)da[2] * db[2];
  8575. w[12] = (sp_int_word)da[3] * db[1];
  8576. w[13] = (sp_int_word)da[2] * db[3];
  8577. w[14] = (sp_int_word)da[3] * db[2];
  8578. w[15] = (sp_int_word)da[3] * db[3];
  8579. r->dp[0] = (sp_int_digit)w[0];
  8580. w[0] >>= SP_WORD_SIZE;
  8581. w[0] += (sp_int_digit)w[1];
  8582. w[0] += (sp_int_digit)w[2];
  8583. r->dp[1] = (sp_int_digit)w[0];
  8584. w[0] >>= SP_WORD_SIZE;
  8585. w[1] >>= SP_WORD_SIZE;
  8586. w[0] += (sp_int_digit)w[1];
  8587. w[2] >>= SP_WORD_SIZE;
  8588. w[0] += (sp_int_digit)w[2];
  8589. w[0] += (sp_int_digit)w[3];
  8590. w[0] += (sp_int_digit)w[4];
  8591. w[0] += (sp_int_digit)w[5];
  8592. r->dp[2] = (sp_int_digit)w[0];
  8593. w[0] >>= SP_WORD_SIZE;
  8594. w[3] >>= SP_WORD_SIZE;
  8595. w[0] += (sp_int_digit)w[3];
  8596. w[4] >>= SP_WORD_SIZE;
  8597. w[0] += (sp_int_digit)w[4];
  8598. w[5] >>= SP_WORD_SIZE;
  8599. w[0] += (sp_int_digit)w[5];
  8600. w[0] += (sp_int_digit)w[6];
  8601. w[0] += (sp_int_digit)w[7];
  8602. w[0] += (sp_int_digit)w[8];
  8603. w[0] += (sp_int_digit)w[9];
  8604. r->dp[3] = (sp_int_digit)w[0];
  8605. w[0] >>= SP_WORD_SIZE;
  8606. w[6] >>= SP_WORD_SIZE;
  8607. w[0] += (sp_int_digit)w[6];
  8608. w[7] >>= SP_WORD_SIZE;
  8609. w[0] += (sp_int_digit)w[7];
  8610. w[8] >>= SP_WORD_SIZE;
  8611. w[0] += (sp_int_digit)w[8];
  8612. w[9] >>= SP_WORD_SIZE;
  8613. w[0] += (sp_int_digit)w[9];
  8614. w[0] += (sp_int_digit)w[10];
  8615. w[0] += (sp_int_digit)w[11];
  8616. w[0] += (sp_int_digit)w[12];
  8617. r->dp[4] = (sp_int_digit)w[0];
  8618. w[0] >>= SP_WORD_SIZE;
  8619. w[10] >>= SP_WORD_SIZE;
  8620. w[0] += (sp_int_digit)w[10];
  8621. w[11] >>= SP_WORD_SIZE;
  8622. w[0] += (sp_int_digit)w[11];
  8623. w[12] >>= SP_WORD_SIZE;
  8624. w[0] += (sp_int_digit)w[12];
  8625. w[0] += (sp_int_digit)w[13];
  8626. w[0] += (sp_int_digit)w[14];
  8627. r->dp[5] = (sp_int_digit)w[0];
  8628. w[0] >>= SP_WORD_SIZE;
  8629. w[13] >>= SP_WORD_SIZE;
  8630. w[0] += (sp_int_digit)w[13];
  8631. w[14] >>= SP_WORD_SIZE;
  8632. w[0] += (sp_int_digit)w[14];
  8633. w[0] += (sp_int_digit)w[15];
  8634. r->dp[6] = (sp_int_digit)w[0];
  8635. w[0] >>= SP_WORD_SIZE;
  8636. w[15] >>= SP_WORD_SIZE;
  8637. w[0] += (sp_int_digit)w[15];
  8638. r->dp[7] = (sp_int_digit)w[0];
  8639. r->used = 8;
  8640. sp_clamp(r);
  8641. }
  8642. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  8643. if (w != NULL) {
  8644. XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
  8645. }
  8646. #endif
  8647. return err;
  8648. }
  8649. #else /* SQR_MUL_ASM */
  8650. /* Multiply a by b and store in r: r = a * b
  8651. *
  8652. * Comba implementation.
  8653. *
  8654. * @param [in] a SP integer to multiply.
  8655. * @param [in] b SP integer to multiply.
  8656. * @param [out] r SP integer result.
  8657. *
  8658. * @return MP_OKAY on success.
  8659. * @return MP_MEM when dynamic memory allocation fails.
  8660. */
  8661. static int _sp_mul_4(const sp_int* a, const sp_int* b, sp_int* r)
  8662. {
  8663. sp_int_digit l = 0;
  8664. sp_int_digit h = 0;
  8665. sp_int_digit o = 0;
  8666. sp_int_digit t[4];
  8667. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8668. t[0] = h;
  8669. h = 0;
  8670. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8671. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8672. t[1] = l;
  8673. l = h;
  8674. h = o;
  8675. o = 0;
  8676. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8677. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8678. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8679. t[2] = l;
  8680. l = h;
  8681. h = o;
  8682. o = 0;
  8683. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8684. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8685. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8686. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8687. t[3] = l;
  8688. l = h;
  8689. h = o;
  8690. o = 0;
  8691. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  8692. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  8693. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  8694. r->dp[4] = l;
  8695. l = h;
  8696. h = o;
  8697. o = 0;
  8698. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  8699. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  8700. r->dp[5] = l;
  8701. l = h;
  8702. h = o;
  8703. SP_ASM_MUL_ADD_NO(l, h, a->dp[3], b->dp[3]);
  8704. r->dp[6] = l;
  8705. r->dp[7] = h;
  8706. XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
  8707. r->used = 8;
  8708. sp_clamp(r);
  8709. return MP_OKAY;
  8710. }
  8711. #endif /* SQR_MUL_ASM */
  8712. #endif /* SP_WORD_SIZE == 64 */
  8713. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  8714. #ifdef SQR_MUL_ASM
  8715. /* Multiply a by b and store in r: r = a * b
  8716. *
  8717. * Comba implementation.
  8718. *
  8719. * @param [in] a SP integer to multiply.
  8720. * @param [in] b SP integer to multiply.
  8721. * @param [out] r SP integer result.
  8722. *
  8723. * @return MP_OKAY on success.
  8724. * @return MP_MEM when dynamic memory allocation fails.
  8725. */
  8726. static int _sp_mul_6(const sp_int* a, const sp_int* b, sp_int* r)
  8727. {
  8728. sp_int_digit l = 0;
  8729. sp_int_digit h = 0;
  8730. sp_int_digit o = 0;
  8731. sp_int_digit t[6];
  8732. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8733. t[0] = h;
  8734. h = 0;
  8735. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8736. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8737. t[1] = l;
  8738. l = h;
  8739. h = o;
  8740. o = 0;
  8741. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8742. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8743. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8744. t[2] = l;
  8745. l = h;
  8746. h = o;
  8747. o = 0;
  8748. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8749. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8750. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8751. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8752. t[3] = l;
  8753. l = h;
  8754. h = o;
  8755. o = 0;
  8756. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  8757. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  8758. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  8759. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  8760. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  8761. t[4] = l;
  8762. l = h;
  8763. h = o;
  8764. o = 0;
  8765. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  8766. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  8767. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  8768. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  8769. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  8770. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  8771. t[5] = l;
  8772. l = h;
  8773. h = o;
  8774. o = 0;
  8775. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  8776. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  8777. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  8778. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  8779. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  8780. r->dp[6] = l;
  8781. l = h;
  8782. h = o;
  8783. o = 0;
  8784. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  8785. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  8786. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  8787. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  8788. r->dp[7] = l;
  8789. l = h;
  8790. h = o;
  8791. o = 0;
  8792. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  8793. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  8794. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  8795. r->dp[8] = l;
  8796. l = h;
  8797. h = o;
  8798. o = 0;
  8799. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  8800. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  8801. r->dp[9] = l;
  8802. l = h;
  8803. h = o;
  8804. SP_ASM_MUL_ADD_NO(l, h, a->dp[5], b->dp[5]);
  8805. r->dp[10] = l;
  8806. r->dp[11] = h;
  8807. XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
  8808. r->used = 12;
  8809. sp_clamp(r);
  8810. return MP_OKAY;
  8811. }
  8812. #endif /* SQR_MUL_ASM */
  8813. #endif /* SP_WORD_SIZE == 64 */
  8814. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  8815. #ifdef SQR_MUL_ASM
  8816. /* Multiply a by b and store in r: r = a * b
  8817. *
  8818. * Comba implementation.
  8819. *
  8820. * @param [in] a SP integer to multiply.
  8821. * @param [in] b SP integer to multiply.
  8822. * @param [out] r SP integer result.
  8823. *
  8824. * @return MP_OKAY on success.
  8825. * @return MP_MEM when dynamic memory allocation fails.
  8826. */
  8827. static int _sp_mul_8(const sp_int* a, const sp_int* b, sp_int* r)
  8828. {
  8829. sp_int_digit l = 0;
  8830. sp_int_digit h = 0;
  8831. sp_int_digit o = 0;
  8832. sp_int_digit t[8];
  8833. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8834. t[0] = h;
  8835. h = 0;
  8836. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8837. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8838. t[1] = l;
  8839. l = h;
  8840. h = o;
  8841. o = 0;
  8842. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8843. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8844. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8845. t[2] = l;
  8846. l = h;
  8847. h = o;
  8848. o = 0;
  8849. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8850. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8851. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8852. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8853. t[3] = l;
  8854. l = h;
  8855. h = o;
  8856. o = 0;
  8857. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  8858. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  8859. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  8860. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  8861. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  8862. t[4] = l;
  8863. l = h;
  8864. h = o;
  8865. o = 0;
  8866. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  8867. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  8868. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  8869. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  8870. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  8871. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  8872. t[5] = l;
  8873. l = h;
  8874. h = o;
  8875. o = 0;
  8876. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  8877. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  8878. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  8879. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  8880. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  8881. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  8882. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  8883. t[6] = l;
  8884. l = h;
  8885. h = o;
  8886. o = 0;
  8887. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  8888. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  8889. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  8890. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  8891. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  8892. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  8893. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  8894. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  8895. t[7] = l;
  8896. l = h;
  8897. h = o;
  8898. o = 0;
  8899. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  8900. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  8901. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  8902. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  8903. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  8904. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  8905. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  8906. r->dp[8] = l;
  8907. l = h;
  8908. h = o;
  8909. o = 0;
  8910. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  8911. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  8912. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  8913. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  8914. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  8915. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  8916. r->dp[9] = l;
  8917. l = h;
  8918. h = o;
  8919. o = 0;
  8920. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  8921. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  8922. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  8923. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  8924. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  8925. r->dp[10] = l;
  8926. l = h;
  8927. h = o;
  8928. o = 0;
  8929. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  8930. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  8931. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  8932. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  8933. r->dp[11] = l;
  8934. l = h;
  8935. h = o;
  8936. o = 0;
  8937. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  8938. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  8939. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  8940. r->dp[12] = l;
  8941. l = h;
  8942. h = o;
  8943. o = 0;
  8944. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  8945. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  8946. r->dp[13] = l;
  8947. l = h;
  8948. h = o;
  8949. SP_ASM_MUL_ADD_NO(l, h, a->dp[7], b->dp[7]);
  8950. r->dp[14] = l;
  8951. r->dp[15] = h;
  8952. XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
  8953. r->used = 16;
  8954. sp_clamp(r);
  8955. return MP_OKAY;
  8956. }
  8957. #endif /* SQR_MUL_ASM */
  8958. #endif /* SP_WORD_SIZE == 32 */
  8959. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  8960. #ifdef SQR_MUL_ASM
  8961. /* Multiply a by b and store in r: r = a * b
  8962. *
  8963. * Comba implementation.
  8964. *
  8965. * @param [in] a SP integer to multiply.
  8966. * @param [in] b SP integer to multiply.
  8967. * @param [out] r SP integer result.
  8968. *
  8969. * @return MP_OKAY on success.
  8970. * @return MP_MEM when dynamic memory allocation fails.
  8971. */
  8972. static int _sp_mul_12(const sp_int* a, const sp_int* b, sp_int* r)
  8973. {
  8974. sp_int_digit l = 0;
  8975. sp_int_digit h = 0;
  8976. sp_int_digit o = 0;
  8977. sp_int_digit t[12];
  8978. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  8979. t[0] = h;
  8980. h = 0;
  8981. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  8982. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  8983. t[1] = l;
  8984. l = h;
  8985. h = o;
  8986. o = 0;
  8987. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  8988. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  8989. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  8990. t[2] = l;
  8991. l = h;
  8992. h = o;
  8993. o = 0;
  8994. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  8995. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  8996. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  8997. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  8998. t[3] = l;
  8999. l = h;
  9000. h = o;
  9001. o = 0;
  9002. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  9003. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  9004. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  9005. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  9006. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  9007. t[4] = l;
  9008. l = h;
  9009. h = o;
  9010. o = 0;
  9011. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  9012. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  9013. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  9014. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  9015. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  9016. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  9017. t[5] = l;
  9018. l = h;
  9019. h = o;
  9020. o = 0;
  9021. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  9022. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  9023. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  9024. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  9025. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  9026. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  9027. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  9028. t[6] = l;
  9029. l = h;
  9030. h = o;
  9031. o = 0;
  9032. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  9033. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  9034. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  9035. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  9036. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  9037. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  9038. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  9039. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  9040. t[7] = l;
  9041. l = h;
  9042. h = o;
  9043. o = 0;
  9044. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  9045. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  9046. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  9047. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  9048. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  9049. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  9050. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  9051. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  9052. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  9053. t[8] = l;
  9054. l = h;
  9055. h = o;
  9056. o = 0;
  9057. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  9058. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  9059. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  9060. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  9061. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  9062. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  9063. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  9064. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  9065. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  9066. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  9067. t[9] = l;
  9068. l = h;
  9069. h = o;
  9070. o = 0;
  9071. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  9072. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  9073. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  9074. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  9075. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  9076. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  9077. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  9078. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  9079. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  9080. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  9081. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  9082. t[10] = l;
  9083. l = h;
  9084. h = o;
  9085. o = 0;
  9086. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  9087. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  9088. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  9089. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  9090. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  9091. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  9092. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  9093. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  9094. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  9095. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  9096. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  9097. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  9098. t[11] = l;
  9099. l = h;
  9100. h = o;
  9101. o = 0;
  9102. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  9103. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  9104. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  9105. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  9106. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  9107. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  9108. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  9109. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  9110. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  9111. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  9112. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  9113. r->dp[12] = l;
  9114. l = h;
  9115. h = o;
  9116. o = 0;
  9117. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  9118. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  9119. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  9120. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  9121. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  9122. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  9123. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  9124. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  9125. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  9126. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  9127. r->dp[13] = l;
  9128. l = h;
  9129. h = o;
  9130. o = 0;
  9131. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  9132. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  9133. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  9134. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  9135. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  9136. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  9137. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  9138. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  9139. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  9140. r->dp[14] = l;
  9141. l = h;
  9142. h = o;
  9143. o = 0;
  9144. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  9145. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  9146. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  9147. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  9148. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  9149. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  9150. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  9151. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  9152. r->dp[15] = l;
  9153. l = h;
  9154. h = o;
  9155. o = 0;
  9156. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  9157. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  9158. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  9159. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  9160. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  9161. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  9162. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  9163. r->dp[16] = l;
  9164. l = h;
  9165. h = o;
  9166. o = 0;
  9167. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  9168. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  9169. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  9170. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  9171. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  9172. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  9173. r->dp[17] = l;
  9174. l = h;
  9175. h = o;
  9176. o = 0;
  9177. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  9178. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  9179. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  9180. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  9181. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  9182. r->dp[18] = l;
  9183. l = h;
  9184. h = o;
  9185. o = 0;
  9186. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  9187. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  9188. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  9189. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  9190. r->dp[19] = l;
  9191. l = h;
  9192. h = o;
  9193. o = 0;
  9194. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  9195. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  9196. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  9197. r->dp[20] = l;
  9198. l = h;
  9199. h = o;
  9200. o = 0;
  9201. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  9202. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  9203. r->dp[21] = l;
  9204. l = h;
  9205. h = o;
  9206. SP_ASM_MUL_ADD_NO(l, h, a->dp[11], b->dp[11]);
  9207. r->dp[22] = l;
  9208. r->dp[23] = h;
  9209. XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
  9210. r->used = 24;
  9211. sp_clamp(r);
  9212. return MP_OKAY;
  9213. }
  9214. #endif /* SQR_MUL_ASM */
  9215. #endif /* SP_WORD_SIZE == 32 */
  9216. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  9217. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  9218. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  9219. (SP_WORD_SIZE == 64)))
  9220. #if SP_INT_DIGITS >= 32
  9221. /* Multiply a by b and store in r: r = a * b
  9222. *
  9223. * Comba implementation.
  9224. *
  9225. * @param [in] a SP integer to multiply.
  9226. * @param [in] b SP integer to multiply.
  9227. * @param [out] r SP integer result.
  9228. *
  9229. * @return MP_OKAY on success.
  9230. * @return MP_MEM when dynamic memory allocation fails.
  9231. */
  9232. static int _sp_mul_16(const sp_int* a, const sp_int* b, sp_int* r)
  9233. {
  9234. int err = MP_OKAY;
  9235. sp_int_digit l = 0;
  9236. sp_int_digit h = 0;
  9237. sp_int_digit o = 0;
  9238. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9239. sp_int_digit* t = NULL;
  9240. #else
  9241. sp_int_digit t[16];
  9242. #endif
  9243. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9244. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
  9245. DYNAMIC_TYPE_BIGINT);
  9246. if (t == NULL) {
  9247. err = MP_MEM;
  9248. }
  9249. #endif
  9250. if (err == MP_OKAY) {
  9251. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  9252. t[0] = h;
  9253. h = 0;
  9254. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  9255. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  9256. t[1] = l;
  9257. l = h;
  9258. h = o;
  9259. o = 0;
  9260. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  9261. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  9262. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  9263. t[2] = l;
  9264. l = h;
  9265. h = o;
  9266. o = 0;
  9267. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  9268. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  9269. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  9270. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  9271. t[3] = l;
  9272. l = h;
  9273. h = o;
  9274. o = 0;
  9275. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  9276. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  9277. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  9278. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  9279. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  9280. t[4] = l;
  9281. l = h;
  9282. h = o;
  9283. o = 0;
  9284. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  9285. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  9286. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  9287. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  9288. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  9289. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  9290. t[5] = l;
  9291. l = h;
  9292. h = o;
  9293. o = 0;
  9294. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  9295. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  9296. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  9297. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  9298. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  9299. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  9300. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  9301. t[6] = l;
  9302. l = h;
  9303. h = o;
  9304. o = 0;
  9305. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  9306. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  9307. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  9308. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  9309. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  9310. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  9311. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  9312. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  9313. t[7] = l;
  9314. l = h;
  9315. h = o;
  9316. o = 0;
  9317. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  9318. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  9319. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  9320. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  9321. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  9322. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  9323. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  9324. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  9325. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  9326. t[8] = l;
  9327. l = h;
  9328. h = o;
  9329. o = 0;
  9330. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  9331. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  9332. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  9333. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  9334. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  9335. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  9336. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  9337. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  9338. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  9339. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  9340. t[9] = l;
  9341. l = h;
  9342. h = o;
  9343. o = 0;
  9344. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  9345. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  9346. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  9347. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  9348. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  9349. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  9350. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  9351. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  9352. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  9353. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  9354. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  9355. t[10] = l;
  9356. l = h;
  9357. h = o;
  9358. o = 0;
  9359. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  9360. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  9361. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  9362. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  9363. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  9364. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  9365. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  9366. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  9367. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  9368. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  9369. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  9370. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  9371. t[11] = l;
  9372. l = h;
  9373. h = o;
  9374. o = 0;
  9375. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
  9376. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  9377. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  9378. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  9379. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  9380. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  9381. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  9382. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  9383. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  9384. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  9385. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  9386. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  9387. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
  9388. t[12] = l;
  9389. l = h;
  9390. h = o;
  9391. o = 0;
  9392. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
  9393. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
  9394. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  9395. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  9396. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  9397. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  9398. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  9399. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  9400. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  9401. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  9402. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  9403. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  9404. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
  9405. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
  9406. t[13] = l;
  9407. l = h;
  9408. h = o;
  9409. o = 0;
  9410. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
  9411. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
  9412. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
  9413. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  9414. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  9415. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  9416. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  9417. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  9418. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  9419. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  9420. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  9421. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  9422. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
  9423. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
  9424. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
  9425. t[14] = l;
  9426. l = h;
  9427. h = o;
  9428. o = 0;
  9429. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
  9430. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
  9431. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
  9432. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
  9433. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  9434. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  9435. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  9436. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  9437. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  9438. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  9439. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  9440. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  9441. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
  9442. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
  9443. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
  9444. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
  9445. t[15] = l;
  9446. l = h;
  9447. h = o;
  9448. o = 0;
  9449. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
  9450. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
  9451. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
  9452. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
  9453. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  9454. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  9455. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  9456. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  9457. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  9458. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  9459. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  9460. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
  9461. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
  9462. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
  9463. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
  9464. r->dp[16] = l;
  9465. l = h;
  9466. h = o;
  9467. o = 0;
  9468. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
  9469. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
  9470. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
  9471. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
  9472. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  9473. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  9474. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  9475. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  9476. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  9477. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  9478. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
  9479. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
  9480. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
  9481. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
  9482. r->dp[17] = l;
  9483. l = h;
  9484. h = o;
  9485. o = 0;
  9486. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
  9487. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
  9488. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
  9489. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
  9490. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  9491. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  9492. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  9493. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  9494. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  9495. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
  9496. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
  9497. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
  9498. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
  9499. r->dp[18] = l;
  9500. l = h;
  9501. h = o;
  9502. o = 0;
  9503. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
  9504. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
  9505. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
  9506. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
  9507. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  9508. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  9509. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  9510. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  9511. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
  9512. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
  9513. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
  9514. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
  9515. r->dp[19] = l;
  9516. l = h;
  9517. h = o;
  9518. o = 0;
  9519. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
  9520. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
  9521. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
  9522. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
  9523. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  9524. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  9525. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  9526. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
  9527. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
  9528. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
  9529. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
  9530. r->dp[20] = l;
  9531. l = h;
  9532. h = o;
  9533. o = 0;
  9534. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
  9535. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
  9536. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
  9537. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
  9538. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  9539. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  9540. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
  9541. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
  9542. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
  9543. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
  9544. r->dp[21] = l;
  9545. l = h;
  9546. h = o;
  9547. o = 0;
  9548. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
  9549. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
  9550. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
  9551. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
  9552. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
  9553. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
  9554. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
  9555. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
  9556. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
  9557. r->dp[22] = l;
  9558. l = h;
  9559. h = o;
  9560. o = 0;
  9561. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
  9562. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
  9563. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
  9564. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
  9565. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
  9566. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
  9567. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
  9568. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
  9569. r->dp[23] = l;
  9570. l = h;
  9571. h = o;
  9572. o = 0;
  9573. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
  9574. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
  9575. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
  9576. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
  9577. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
  9578. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
  9579. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
  9580. r->dp[24] = l;
  9581. l = h;
  9582. h = o;
  9583. o = 0;
  9584. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
  9585. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
  9586. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
  9587. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
  9588. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
  9589. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
  9590. r->dp[25] = l;
  9591. l = h;
  9592. h = o;
  9593. o = 0;
  9594. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
  9595. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
  9596. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
  9597. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
  9598. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
  9599. r->dp[26] = l;
  9600. l = h;
  9601. h = o;
  9602. o = 0;
  9603. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
  9604. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
  9605. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
  9606. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
  9607. r->dp[27] = l;
  9608. l = h;
  9609. h = o;
  9610. o = 0;
  9611. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
  9612. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
  9613. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
  9614. r->dp[28] = l;
  9615. l = h;
  9616. h = o;
  9617. o = 0;
  9618. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
  9619. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
  9620. r->dp[29] = l;
  9621. l = h;
  9622. h = o;
  9623. SP_ASM_MUL_ADD_NO(l, h, a->dp[15], b->dp[15]);
  9624. r->dp[30] = l;
  9625. r->dp[31] = h;
  9626. XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
  9627. r->used = 32;
  9628. sp_clamp(r);
  9629. }
  9630. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9631. if (t != NULL) {
  9632. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  9633. }
  9634. #endif
  9635. return err;
  9636. }
  9637. #endif /* SP_INT_DIGITS >= 32 */
  9638. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  9639. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  9640. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  9641. #if SP_INT_DIGITS >= 48
  9642. /* Multiply a by b and store in r: r = a * b
  9643. *
  9644. * Comba implementation.
  9645. *
  9646. * @param [in] a SP integer to multiply.
  9647. * @param [in] b SP integer to multiply.
  9648. * @param [out] r SP integer result.
  9649. *
  9650. * @return MP_OKAY on success.
  9651. * @return MP_MEM when dynamic memory allocation fails.
  9652. */
  9653. static int _sp_mul_24(const sp_int* a, const sp_int* b, sp_int* r)
  9654. {
  9655. int err = MP_OKAY;
  9656. sp_int_digit l = 0;
  9657. sp_int_digit h = 0;
  9658. sp_int_digit o = 0;
  9659. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9660. sp_int_digit* t = NULL;
  9661. #else
  9662. sp_int_digit t[24];
  9663. #endif
  9664. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  9665. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
  9666. DYNAMIC_TYPE_BIGINT);
  9667. if (t == NULL) {
  9668. err = MP_MEM;
  9669. }
  9670. #endif
  9671. if (err == MP_OKAY) {
  9672. SP_ASM_MUL(h, l, a->dp[0], b->dp[0]);
  9673. t[0] = h;
  9674. h = 0;
  9675. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[1]);
  9676. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[0]);
  9677. t[1] = l;
  9678. l = h;
  9679. h = o;
  9680. o = 0;
  9681. SP_ASM_MUL_ADD_NO(l, h, a->dp[0], b->dp[2]);
  9682. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[1]);
  9683. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[0]);
  9684. t[2] = l;
  9685. l = h;
  9686. h = o;
  9687. o = 0;
  9688. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[3]);
  9689. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[2]);
  9690. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[1]);
  9691. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[0]);
  9692. t[3] = l;
  9693. l = h;
  9694. h = o;
  9695. o = 0;
  9696. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[4]);
  9697. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[3]);
  9698. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[2]);
  9699. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[1]);
  9700. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[0]);
  9701. t[4] = l;
  9702. l = h;
  9703. h = o;
  9704. o = 0;
  9705. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[5]);
  9706. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[4]);
  9707. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[3]);
  9708. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[2]);
  9709. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[1]);
  9710. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[0]);
  9711. t[5] = l;
  9712. l = h;
  9713. h = o;
  9714. o = 0;
  9715. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[6]);
  9716. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[5]);
  9717. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[4]);
  9718. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[3]);
  9719. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[2]);
  9720. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[1]);
  9721. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[0]);
  9722. t[6] = l;
  9723. l = h;
  9724. h = o;
  9725. o = 0;
  9726. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[7]);
  9727. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[6]);
  9728. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[5]);
  9729. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[4]);
  9730. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[3]);
  9731. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[2]);
  9732. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[1]);
  9733. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[0]);
  9734. t[7] = l;
  9735. l = h;
  9736. h = o;
  9737. o = 0;
  9738. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[8]);
  9739. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[7]);
  9740. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[6]);
  9741. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[5]);
  9742. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[4]);
  9743. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[3]);
  9744. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[2]);
  9745. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[1]);
  9746. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[0]);
  9747. t[8] = l;
  9748. l = h;
  9749. h = o;
  9750. o = 0;
  9751. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[9]);
  9752. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[8]);
  9753. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[7]);
  9754. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[6]);
  9755. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[5]);
  9756. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[4]);
  9757. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[3]);
  9758. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[2]);
  9759. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[1]);
  9760. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[0]);
  9761. t[9] = l;
  9762. l = h;
  9763. h = o;
  9764. o = 0;
  9765. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[10]);
  9766. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[9]);
  9767. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[8]);
  9768. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[7]);
  9769. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[6]);
  9770. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[5]);
  9771. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[4]);
  9772. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[3]);
  9773. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[2]);
  9774. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[1]);
  9775. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[0]);
  9776. t[10] = l;
  9777. l = h;
  9778. h = o;
  9779. o = 0;
  9780. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[11]);
  9781. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[10]);
  9782. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[9]);
  9783. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[8]);
  9784. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[7]);
  9785. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[6]);
  9786. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[5]);
  9787. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[4]);
  9788. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[3]);
  9789. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[2]);
  9790. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[1]);
  9791. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[0]);
  9792. t[11] = l;
  9793. l = h;
  9794. h = o;
  9795. o = 0;
  9796. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[12]);
  9797. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[11]);
  9798. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[10]);
  9799. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[9]);
  9800. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[8]);
  9801. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[7]);
  9802. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[6]);
  9803. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[5]);
  9804. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[4]);
  9805. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[3]);
  9806. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[2]);
  9807. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[1]);
  9808. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[0]);
  9809. t[12] = l;
  9810. l = h;
  9811. h = o;
  9812. o = 0;
  9813. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[13]);
  9814. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[12]);
  9815. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[11]);
  9816. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[10]);
  9817. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[9]);
  9818. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[8]);
  9819. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[7]);
  9820. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[6]);
  9821. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[5]);
  9822. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[4]);
  9823. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[3]);
  9824. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[2]);
  9825. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[1]);
  9826. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[0]);
  9827. t[13] = l;
  9828. l = h;
  9829. h = o;
  9830. o = 0;
  9831. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[14]);
  9832. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[13]);
  9833. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[12]);
  9834. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[11]);
  9835. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[10]);
  9836. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[9]);
  9837. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[8]);
  9838. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[7]);
  9839. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[6]);
  9840. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[5]);
  9841. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[4]);
  9842. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[3]);
  9843. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[2]);
  9844. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[1]);
  9845. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[0]);
  9846. t[14] = l;
  9847. l = h;
  9848. h = o;
  9849. o = 0;
  9850. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[15]);
  9851. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[14]);
  9852. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[13]);
  9853. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[12]);
  9854. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[11]);
  9855. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[10]);
  9856. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[9]);
  9857. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[8]);
  9858. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[7]);
  9859. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[6]);
  9860. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[5]);
  9861. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[4]);
  9862. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[3]);
  9863. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[2]);
  9864. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[1]);
  9865. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[0]);
  9866. t[15] = l;
  9867. l = h;
  9868. h = o;
  9869. o = 0;
  9870. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[16]);
  9871. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[15]);
  9872. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[14]);
  9873. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[13]);
  9874. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[12]);
  9875. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[11]);
  9876. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[10]);
  9877. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[9]);
  9878. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[8]);
  9879. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[7]);
  9880. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[6]);
  9881. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[5]);
  9882. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[4]);
  9883. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[3]);
  9884. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[2]);
  9885. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[1]);
  9886. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[0]);
  9887. t[16] = l;
  9888. l = h;
  9889. h = o;
  9890. o = 0;
  9891. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[17]);
  9892. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[16]);
  9893. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[15]);
  9894. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[14]);
  9895. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[13]);
  9896. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[12]);
  9897. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[11]);
  9898. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[10]);
  9899. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[9]);
  9900. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[8]);
  9901. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[7]);
  9902. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[6]);
  9903. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[5]);
  9904. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[4]);
  9905. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[3]);
  9906. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[2]);
  9907. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[1]);
  9908. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[0]);
  9909. t[17] = l;
  9910. l = h;
  9911. h = o;
  9912. o = 0;
  9913. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[18]);
  9914. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[17]);
  9915. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[16]);
  9916. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[15]);
  9917. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[14]);
  9918. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[13]);
  9919. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[12]);
  9920. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[11]);
  9921. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[10]);
  9922. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[9]);
  9923. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[8]);
  9924. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[7]);
  9925. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[6]);
  9926. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[5]);
  9927. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[4]);
  9928. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[3]);
  9929. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[2]);
  9930. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[1]);
  9931. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[0]);
  9932. t[18] = l;
  9933. l = h;
  9934. h = o;
  9935. o = 0;
  9936. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[19]);
  9937. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[18]);
  9938. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[17]);
  9939. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[16]);
  9940. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[15]);
  9941. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[14]);
  9942. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[13]);
  9943. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[12]);
  9944. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[11]);
  9945. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[10]);
  9946. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[9]);
  9947. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[8]);
  9948. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[7]);
  9949. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[6]);
  9950. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[5]);
  9951. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[4]);
  9952. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[3]);
  9953. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[2]);
  9954. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[1]);
  9955. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[0]);
  9956. t[19] = l;
  9957. l = h;
  9958. h = o;
  9959. o = 0;
  9960. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[20]);
  9961. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[19]);
  9962. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[18]);
  9963. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[17]);
  9964. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[16]);
  9965. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[15]);
  9966. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[14]);
  9967. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[13]);
  9968. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[12]);
  9969. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[11]);
  9970. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[10]);
  9971. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[9]);
  9972. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[8]);
  9973. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[7]);
  9974. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[6]);
  9975. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[5]);
  9976. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[4]);
  9977. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[3]);
  9978. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[2]);
  9979. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[1]);
  9980. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[0]);
  9981. t[20] = l;
  9982. l = h;
  9983. h = o;
  9984. o = 0;
  9985. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[21]);
  9986. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[20]);
  9987. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[19]);
  9988. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[18]);
  9989. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[17]);
  9990. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[16]);
  9991. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[15]);
  9992. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[14]);
  9993. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[13]);
  9994. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[12]);
  9995. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[11]);
  9996. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[10]);
  9997. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[9]);
  9998. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[8]);
  9999. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[7]);
  10000. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[6]);
  10001. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[5]);
  10002. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[4]);
  10003. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[3]);
  10004. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[2]);
  10005. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[1]);
  10006. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[0]);
  10007. t[21] = l;
  10008. l = h;
  10009. h = o;
  10010. o = 0;
  10011. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[22]);
  10012. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[21]);
  10013. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[20]);
  10014. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[19]);
  10015. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[18]);
  10016. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[17]);
  10017. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[16]);
  10018. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[15]);
  10019. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[14]);
  10020. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[13]);
  10021. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[12]);
  10022. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[11]);
  10023. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[10]);
  10024. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[9]);
  10025. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[8]);
  10026. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[7]);
  10027. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[6]);
  10028. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[5]);
  10029. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[4]);
  10030. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[3]);
  10031. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[2]);
  10032. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[1]);
  10033. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[0]);
  10034. t[22] = l;
  10035. l = h;
  10036. h = o;
  10037. o = 0;
  10038. SP_ASM_MUL_ADD(l, h, o, a->dp[0], b->dp[23]);
  10039. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[22]);
  10040. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[21]);
  10041. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[20]);
  10042. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[19]);
  10043. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[18]);
  10044. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[17]);
  10045. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[16]);
  10046. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[15]);
  10047. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[14]);
  10048. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[13]);
  10049. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[12]);
  10050. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[11]);
  10051. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[10]);
  10052. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[9]);
  10053. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[8]);
  10054. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[7]);
  10055. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[6]);
  10056. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[5]);
  10057. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[4]);
  10058. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[3]);
  10059. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[2]);
  10060. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[1]);
  10061. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[0]);
  10062. t[23] = l;
  10063. l = h;
  10064. h = o;
  10065. o = 0;
  10066. SP_ASM_MUL_ADD(l, h, o, a->dp[1], b->dp[23]);
  10067. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[22]);
  10068. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[21]);
  10069. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[20]);
  10070. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[19]);
  10071. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[18]);
  10072. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[17]);
  10073. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[16]);
  10074. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[15]);
  10075. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[14]);
  10076. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[13]);
  10077. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[12]);
  10078. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[11]);
  10079. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[10]);
  10080. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[9]);
  10081. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[8]);
  10082. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[7]);
  10083. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[6]);
  10084. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[5]);
  10085. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[4]);
  10086. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[3]);
  10087. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[2]);
  10088. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[1]);
  10089. r->dp[24] = l;
  10090. l = h;
  10091. h = o;
  10092. o = 0;
  10093. SP_ASM_MUL_ADD(l, h, o, a->dp[2], b->dp[23]);
  10094. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[22]);
  10095. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[21]);
  10096. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[20]);
  10097. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[19]);
  10098. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[18]);
  10099. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[17]);
  10100. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[16]);
  10101. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[15]);
  10102. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[14]);
  10103. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[13]);
  10104. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[12]);
  10105. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[11]);
  10106. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[10]);
  10107. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[9]);
  10108. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[8]);
  10109. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[7]);
  10110. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[6]);
  10111. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[5]);
  10112. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[4]);
  10113. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[3]);
  10114. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[2]);
  10115. r->dp[25] = l;
  10116. l = h;
  10117. h = o;
  10118. o = 0;
  10119. SP_ASM_MUL_ADD(l, h, o, a->dp[3], b->dp[23]);
  10120. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[22]);
  10121. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[21]);
  10122. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[20]);
  10123. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[19]);
  10124. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[18]);
  10125. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[17]);
  10126. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[16]);
  10127. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[15]);
  10128. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[14]);
  10129. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[13]);
  10130. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[12]);
  10131. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[11]);
  10132. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[10]);
  10133. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[9]);
  10134. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[8]);
  10135. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[7]);
  10136. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[6]);
  10137. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[5]);
  10138. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[4]);
  10139. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[3]);
  10140. r->dp[26] = l;
  10141. l = h;
  10142. h = o;
  10143. o = 0;
  10144. SP_ASM_MUL_ADD(l, h, o, a->dp[4], b->dp[23]);
  10145. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[22]);
  10146. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[21]);
  10147. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[20]);
  10148. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[19]);
  10149. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[18]);
  10150. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[17]);
  10151. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[16]);
  10152. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[15]);
  10153. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[14]);
  10154. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[13]);
  10155. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[12]);
  10156. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[11]);
  10157. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[10]);
  10158. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[9]);
  10159. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[8]);
  10160. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[7]);
  10161. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[6]);
  10162. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[5]);
  10163. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[4]);
  10164. r->dp[27] = l;
  10165. l = h;
  10166. h = o;
  10167. o = 0;
  10168. SP_ASM_MUL_ADD(l, h, o, a->dp[5], b->dp[23]);
  10169. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[22]);
  10170. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[21]);
  10171. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[20]);
  10172. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[19]);
  10173. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[18]);
  10174. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[17]);
  10175. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[16]);
  10176. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[15]);
  10177. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[14]);
  10178. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[13]);
  10179. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[12]);
  10180. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[11]);
  10181. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[10]);
  10182. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[9]);
  10183. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[8]);
  10184. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[7]);
  10185. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[6]);
  10186. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[5]);
  10187. r->dp[28] = l;
  10188. l = h;
  10189. h = o;
  10190. o = 0;
  10191. SP_ASM_MUL_ADD(l, h, o, a->dp[6], b->dp[23]);
  10192. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[22]);
  10193. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[21]);
  10194. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[20]);
  10195. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[19]);
  10196. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[18]);
  10197. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[17]);
  10198. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[16]);
  10199. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[15]);
  10200. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[14]);
  10201. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[13]);
  10202. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[12]);
  10203. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[11]);
  10204. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[10]);
  10205. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[9]);
  10206. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[8]);
  10207. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[7]);
  10208. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[6]);
  10209. r->dp[29] = l;
  10210. l = h;
  10211. h = o;
  10212. o = 0;
  10213. SP_ASM_MUL_ADD(l, h, o, a->dp[7], b->dp[23]);
  10214. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[22]);
  10215. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[21]);
  10216. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[20]);
  10217. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[19]);
  10218. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[18]);
  10219. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[17]);
  10220. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[16]);
  10221. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[15]);
  10222. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[14]);
  10223. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[13]);
  10224. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[12]);
  10225. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[11]);
  10226. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[10]);
  10227. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[9]);
  10228. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[8]);
  10229. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[7]);
  10230. r->dp[30] = l;
  10231. l = h;
  10232. h = o;
  10233. o = 0;
  10234. SP_ASM_MUL_ADD(l, h, o, a->dp[8], b->dp[23]);
  10235. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[22]);
  10236. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[21]);
  10237. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[20]);
  10238. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[19]);
  10239. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[18]);
  10240. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[17]);
  10241. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[16]);
  10242. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[15]);
  10243. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[14]);
  10244. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[13]);
  10245. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[12]);
  10246. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[11]);
  10247. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[10]);
  10248. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[9]);
  10249. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[8]);
  10250. r->dp[31] = l;
  10251. l = h;
  10252. h = o;
  10253. o = 0;
  10254. SP_ASM_MUL_ADD(l, h, o, a->dp[9], b->dp[23]);
  10255. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[22]);
  10256. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[21]);
  10257. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[20]);
  10258. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[19]);
  10259. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[18]);
  10260. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[17]);
  10261. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[16]);
  10262. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[15]);
  10263. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[14]);
  10264. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[13]);
  10265. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[12]);
  10266. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[11]);
  10267. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[10]);
  10268. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[9]);
  10269. r->dp[32] = l;
  10270. l = h;
  10271. h = o;
  10272. o = 0;
  10273. SP_ASM_MUL_ADD(l, h, o, a->dp[10], b->dp[23]);
  10274. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[22]);
  10275. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[21]);
  10276. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[20]);
  10277. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[19]);
  10278. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[18]);
  10279. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[17]);
  10280. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[16]);
  10281. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[15]);
  10282. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[14]);
  10283. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[13]);
  10284. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[12]);
  10285. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[11]);
  10286. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[10]);
  10287. r->dp[33] = l;
  10288. l = h;
  10289. h = o;
  10290. o = 0;
  10291. SP_ASM_MUL_ADD(l, h, o, a->dp[11], b->dp[23]);
  10292. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[22]);
  10293. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[21]);
  10294. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[20]);
  10295. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[19]);
  10296. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[18]);
  10297. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[17]);
  10298. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[16]);
  10299. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[15]);
  10300. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[14]);
  10301. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[13]);
  10302. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[12]);
  10303. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[11]);
  10304. r->dp[34] = l;
  10305. l = h;
  10306. h = o;
  10307. o = 0;
  10308. SP_ASM_MUL_ADD(l, h, o, a->dp[12], b->dp[23]);
  10309. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[22]);
  10310. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[21]);
  10311. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[20]);
  10312. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[19]);
  10313. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[18]);
  10314. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[17]);
  10315. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[16]);
  10316. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[15]);
  10317. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[14]);
  10318. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[13]);
  10319. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[12]);
  10320. r->dp[35] = l;
  10321. l = h;
  10322. h = o;
  10323. o = 0;
  10324. SP_ASM_MUL_ADD(l, h, o, a->dp[13], b->dp[23]);
  10325. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[22]);
  10326. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[21]);
  10327. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[20]);
  10328. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[19]);
  10329. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[18]);
  10330. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[17]);
  10331. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[16]);
  10332. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[15]);
  10333. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[14]);
  10334. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[13]);
  10335. r->dp[36] = l;
  10336. l = h;
  10337. h = o;
  10338. o = 0;
  10339. SP_ASM_MUL_ADD(l, h, o, a->dp[14], b->dp[23]);
  10340. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[22]);
  10341. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[21]);
  10342. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[20]);
  10343. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[19]);
  10344. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[18]);
  10345. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[17]);
  10346. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[16]);
  10347. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[15]);
  10348. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[14]);
  10349. r->dp[37] = l;
  10350. l = h;
  10351. h = o;
  10352. o = 0;
  10353. SP_ASM_MUL_ADD(l, h, o, a->dp[15], b->dp[23]);
  10354. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[22]);
  10355. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[21]);
  10356. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[20]);
  10357. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[19]);
  10358. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[18]);
  10359. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[17]);
  10360. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[16]);
  10361. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[15]);
  10362. r->dp[38] = l;
  10363. l = h;
  10364. h = o;
  10365. o = 0;
  10366. SP_ASM_MUL_ADD(l, h, o, a->dp[16], b->dp[23]);
  10367. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[22]);
  10368. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[21]);
  10369. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[20]);
  10370. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[19]);
  10371. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[18]);
  10372. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[17]);
  10373. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[16]);
  10374. r->dp[39] = l;
  10375. l = h;
  10376. h = o;
  10377. o = 0;
  10378. SP_ASM_MUL_ADD(l, h, o, a->dp[17], b->dp[23]);
  10379. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[22]);
  10380. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[21]);
  10381. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[20]);
  10382. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[19]);
  10383. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[18]);
  10384. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[17]);
  10385. r->dp[40] = l;
  10386. l = h;
  10387. h = o;
  10388. o = 0;
  10389. SP_ASM_MUL_ADD(l, h, o, a->dp[18], b->dp[23]);
  10390. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[22]);
  10391. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[21]);
  10392. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[20]);
  10393. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[19]);
  10394. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[18]);
  10395. r->dp[41] = l;
  10396. l = h;
  10397. h = o;
  10398. o = 0;
  10399. SP_ASM_MUL_ADD(l, h, o, a->dp[19], b->dp[23]);
  10400. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[22]);
  10401. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[21]);
  10402. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[20]);
  10403. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[19]);
  10404. r->dp[42] = l;
  10405. l = h;
  10406. h = o;
  10407. o = 0;
  10408. SP_ASM_MUL_ADD(l, h, o, a->dp[20], b->dp[23]);
  10409. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[22]);
  10410. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[21]);
  10411. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[20]);
  10412. r->dp[43] = l;
  10413. l = h;
  10414. h = o;
  10415. o = 0;
  10416. SP_ASM_MUL_ADD(l, h, o, a->dp[21], b->dp[23]);
  10417. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[22]);
  10418. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[21]);
  10419. r->dp[44] = l;
  10420. l = h;
  10421. h = o;
  10422. o = 0;
  10423. SP_ASM_MUL_ADD(l, h, o, a->dp[22], b->dp[23]);
  10424. SP_ASM_MUL_ADD(l, h, o, a->dp[23], b->dp[22]);
  10425. r->dp[45] = l;
  10426. l = h;
  10427. h = o;
  10428. SP_ASM_MUL_ADD_NO(l, h, a->dp[23], b->dp[23]);
  10429. r->dp[46] = l;
  10430. r->dp[47] = h;
  10431. XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
  10432. r->used = 48;
  10433. sp_clamp(r);
  10434. }
  10435. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  10436. if (t != NULL) {
  10437. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  10438. }
  10439. #endif
  10440. return err;
  10441. }
  10442. #endif /* SP_INT_DIGITS >= 48 */
  10443. #if SP_INT_DIGITS >= 64
  10444. /* Multiply a by b and store in r: r = a * b
  10445. *
  10446. * Karatsuba implementation.
  10447. *
  10448. * @param [in] a SP integer to multiply.
  10449. * @param [in] b SP integer to multiply.
  10450. * @param [out] r SP integer result.
  10451. *
  10452. * @return MP_OKAY on success.
  10453. * @return MP_MEM when dynamic memory allocation fails.
  10454. */
  10455. static int _sp_mul_32(const sp_int* a, const sp_int* b, sp_int* r)
  10456. {
  10457. int err = MP_OKAY;
  10458. unsigned int i;
  10459. sp_int_digit l;
  10460. sp_int_digit h;
  10461. sp_int* a1;
  10462. sp_int* b1;
  10463. sp_int* z0;
  10464. sp_int* z1;
  10465. sp_int* z2;
  10466. sp_int_digit ca;
  10467. sp_int_digit cb;
  10468. DECL_SP_INT_ARRAY(t, 16, 2);
  10469. DECL_SP_INT_ARRAY(z, 33, 2);
  10470. ALLOC_SP_INT_ARRAY(t, 16, 2, err, NULL);
  10471. ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
  10472. if (err == MP_OKAY) {
  10473. a1 = t[0];
  10474. b1 = t[1];
  10475. z1 = z[0];
  10476. z2 = z[1];
  10477. z0 = r;
  10478. XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
  10479. a1->used = 16;
  10480. XMEMCPY(b1->dp, &b->dp[16], sizeof(sp_int_digit) * 16);
  10481. b1->used = 16;
  10482. /* z2 = a1 * b1 */
  10483. err = _sp_mul_16(a1, b1, z2);
  10484. }
  10485. if (err == MP_OKAY) {
  10486. l = a1->dp[0];
  10487. h = 0;
  10488. SP_ASM_ADDC(l, h, a->dp[0]);
  10489. a1->dp[0] = l;
  10490. l = h;
  10491. h = 0;
  10492. for (i = 1; i < 16; i++) {
  10493. SP_ASM_ADDC(l, h, a1->dp[i]);
  10494. SP_ASM_ADDC(l, h, a->dp[i]);
  10495. a1->dp[i] = l;
  10496. l = h;
  10497. h = 0;
  10498. }
  10499. ca = l;
  10500. /* b01 = b0 + b1 */
  10501. l = b1->dp[0];
  10502. h = 0;
  10503. SP_ASM_ADDC(l, h, b->dp[0]);
  10504. b1->dp[0] = l;
  10505. l = h;
  10506. h = 0;
  10507. for (i = 1; i < 16; i++) {
  10508. SP_ASM_ADDC(l, h, b1->dp[i]);
  10509. SP_ASM_ADDC(l, h, b->dp[i]);
  10510. b1->dp[i] = l;
  10511. l = h;
  10512. h = 0;
  10513. }
  10514. cb = l;
  10515. /* z0 = a0 * b0 */
  10516. err = _sp_mul_16(a, b, z0);
  10517. }
  10518. if (err == MP_OKAY) {
  10519. /* z1 = (a0 + a1) * (b0 + b1) */
  10520. err = _sp_mul_16(a1, b1, z1);
  10521. }
  10522. if (err == MP_OKAY) {
  10523. /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
  10524. /* r = z0 */
  10525. /* r += (z1 - z0 - z2) << 16 */
  10526. z1->dp[32] = ca & cb;
  10527. l = 0;
  10528. if (ca) {
  10529. h = 0;
  10530. for (i = 0; i < 16; i++) {
  10531. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  10532. SP_ASM_ADDC(l, h, b1->dp[i]);
  10533. z1->dp[i + 16] = l;
  10534. l = h;
  10535. h = 0;
  10536. }
  10537. }
  10538. z1->dp[32] += l;
  10539. l = 0;
  10540. if (cb) {
  10541. h = 0;
  10542. for (i = 0; i < 16; i++) {
  10543. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  10544. SP_ASM_ADDC(l, h, a1->dp[i]);
  10545. z1->dp[i + 16] = l;
  10546. l = h;
  10547. h = 0;
  10548. }
  10549. }
  10550. z1->dp[32] += l;
  10551. /* z1 = z1 - z0 - z1 */
  10552. l = 0;
  10553. h = 0;
  10554. for (i = 0; i < 32; i++) {
  10555. l += z1->dp[i];
  10556. SP_ASM_SUBB(l, h, z0->dp[i]);
  10557. SP_ASM_SUBB(l, h, z2->dp[i]);
  10558. z1->dp[i] = l;
  10559. l = h;
  10560. h = 0;
  10561. }
  10562. z1->dp[i] += l;
  10563. /* r += z1 << 16 */
  10564. l = 0;
  10565. h = 0;
  10566. for (i = 0; i < 16; i++) {
  10567. SP_ASM_ADDC(l, h, r->dp[i + 16]);
  10568. SP_ASM_ADDC(l, h, z1->dp[i]);
  10569. r->dp[i + 16] = l;
  10570. l = h;
  10571. h = 0;
  10572. }
  10573. for (; i < 33; i++) {
  10574. SP_ASM_ADDC(l, h, z1->dp[i]);
  10575. r->dp[i + 16] = l;
  10576. l = h;
  10577. h = 0;
  10578. }
  10579. /* r += z2 << 32 */
  10580. l = 0;
  10581. h = 0;
  10582. for (i = 0; i < 17; i++) {
  10583. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  10584. SP_ASM_ADDC(l, h, z2->dp[i]);
  10585. r->dp[i + 32] = l;
  10586. l = h;
  10587. h = 0;
  10588. }
  10589. for (; i < 32; i++) {
  10590. SP_ASM_ADDC(l, h, z2->dp[i]);
  10591. r->dp[i + 32] = l;
  10592. l = h;
  10593. h = 0;
  10594. }
  10595. r->used = 64;
  10596. sp_clamp(r);
  10597. }
  10598. FREE_SP_INT_ARRAY(z, NULL);
  10599. FREE_SP_INT_ARRAY(t, NULL);
  10600. return err;
  10601. }
  10602. #endif /* SP_INT_DIGITS >= 64 */
  10603. #if SP_INT_DIGITS >= 96
  10604. /* Multiply a by b and store in r: r = a * b
  10605. *
  10606. * Karatsuba implementation.
  10607. *
  10608. * @param [in] a SP integer to multiply.
  10609. * @param [in] b SP integer to multiply.
  10610. * @param [out] r SP integer result.
  10611. *
  10612. * @return MP_OKAY on success.
  10613. * @return MP_MEM when dynamic memory allocation fails.
  10614. */
  10615. static int _sp_mul_48(const sp_int* a, const sp_int* b, sp_int* r)
  10616. {
  10617. int err = MP_OKAY;
  10618. unsigned int i;
  10619. sp_int_digit l;
  10620. sp_int_digit h;
  10621. sp_int* a1;
  10622. sp_int* b1;
  10623. sp_int* z0;
  10624. sp_int* z1;
  10625. sp_int* z2;
  10626. sp_int_digit ca;
  10627. sp_int_digit cb;
  10628. DECL_SP_INT_ARRAY(t, 24, 2);
  10629. DECL_SP_INT_ARRAY(z, 49, 2);
  10630. ALLOC_SP_INT_ARRAY(t, 24, 2, err, NULL);
  10631. ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
  10632. if (err == MP_OKAY) {
  10633. a1 = t[0];
  10634. b1 = t[1];
  10635. z1 = z[0];
  10636. z2 = z[1];
  10637. z0 = r;
  10638. XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
  10639. a1->used = 24;
  10640. XMEMCPY(b1->dp, &b->dp[24], sizeof(sp_int_digit) * 24);
  10641. b1->used = 24;
  10642. /* z2 = a1 * b1 */
  10643. err = _sp_mul_24(a1, b1, z2);
  10644. }
  10645. if (err == MP_OKAY) {
  10646. l = a1->dp[0];
  10647. h = 0;
  10648. SP_ASM_ADDC(l, h, a->dp[0]);
  10649. a1->dp[0] = l;
  10650. l = h;
  10651. h = 0;
  10652. for (i = 1; i < 24; i++) {
  10653. SP_ASM_ADDC(l, h, a1->dp[i]);
  10654. SP_ASM_ADDC(l, h, a->dp[i]);
  10655. a1->dp[i] = l;
  10656. l = h;
  10657. h = 0;
  10658. }
  10659. ca = l;
  10660. /* b01 = b0 + b1 */
  10661. l = b1->dp[0];
  10662. h = 0;
  10663. SP_ASM_ADDC(l, h, b->dp[0]);
  10664. b1->dp[0] = l;
  10665. l = h;
  10666. h = 0;
  10667. for (i = 1; i < 24; i++) {
  10668. SP_ASM_ADDC(l, h, b1->dp[i]);
  10669. SP_ASM_ADDC(l, h, b->dp[i]);
  10670. b1->dp[i] = l;
  10671. l = h;
  10672. h = 0;
  10673. }
  10674. cb = l;
  10675. /* z0 = a0 * b0 */
  10676. err = _sp_mul_24(a, b, z0);
  10677. }
  10678. if (err == MP_OKAY) {
  10679. /* z1 = (a0 + a1) * (b0 + b1) */
  10680. err = _sp_mul_24(a1, b1, z1);
  10681. }
  10682. if (err == MP_OKAY) {
  10683. /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
  10684. /* r = z0 */
  10685. /* r += (z1 - z0 - z2) << 24 */
  10686. z1->dp[48] = ca & cb;
  10687. l = 0;
  10688. if (ca) {
  10689. h = 0;
  10690. for (i = 0; i < 24; i++) {
  10691. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  10692. SP_ASM_ADDC(l, h, b1->dp[i]);
  10693. z1->dp[i + 24] = l;
  10694. l = h;
  10695. h = 0;
  10696. }
  10697. }
  10698. z1->dp[48] += l;
  10699. l = 0;
  10700. if (cb) {
  10701. h = 0;
  10702. for (i = 0; i < 24; i++) {
  10703. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  10704. SP_ASM_ADDC(l, h, a1->dp[i]);
  10705. z1->dp[i + 24] = l;
  10706. l = h;
  10707. h = 0;
  10708. }
  10709. }
  10710. z1->dp[48] += l;
  10711. /* z1 = z1 - z0 - z1 */
  10712. l = 0;
  10713. h = 0;
  10714. for (i = 0; i < 48; i++) {
  10715. l += z1->dp[i];
  10716. SP_ASM_SUBB(l, h, z0->dp[i]);
  10717. SP_ASM_SUBB(l, h, z2->dp[i]);
  10718. z1->dp[i] = l;
  10719. l = h;
  10720. h = 0;
  10721. }
  10722. z1->dp[i] += l;
  10723. /* r += z1 << 16 */
  10724. l = 0;
  10725. h = 0;
  10726. for (i = 0; i < 24; i++) {
  10727. SP_ASM_ADDC(l, h, r->dp[i + 24]);
  10728. SP_ASM_ADDC(l, h, z1->dp[i]);
  10729. r->dp[i + 24] = l;
  10730. l = h;
  10731. h = 0;
  10732. }
  10733. for (; i < 49; i++) {
  10734. SP_ASM_ADDC(l, h, z1->dp[i]);
  10735. r->dp[i + 24] = l;
  10736. l = h;
  10737. h = 0;
  10738. }
  10739. /* r += z2 << 48 */
  10740. l = 0;
  10741. h = 0;
  10742. for (i = 0; i < 25; i++) {
  10743. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  10744. SP_ASM_ADDC(l, h, z2->dp[i]);
  10745. r->dp[i + 48] = l;
  10746. l = h;
  10747. h = 0;
  10748. }
  10749. for (; i < 48; i++) {
  10750. SP_ASM_ADDC(l, h, z2->dp[i]);
  10751. r->dp[i + 48] = l;
  10752. l = h;
  10753. h = 0;
  10754. }
  10755. r->used = 96;
  10756. sp_clamp(r);
  10757. }
  10758. FREE_SP_INT_ARRAY(z, NULL);
  10759. FREE_SP_INT_ARRAY(t, NULL);
  10760. return err;
  10761. }
  10762. #endif /* SP_INT_DIGITS >= 96 */
  10763. #if SP_INT_DIGITS >= 128
  10764. /* Multiply a by b and store in r: r = a * b
  10765. *
  10766. * Karatsuba implementation.
  10767. *
  10768. * @param [in] a SP integer to multiply.
  10769. * @param [in] b SP integer to multiply.
  10770. * @param [out] r SP integer result.
  10771. *
  10772. * @return MP_OKAY on success.
  10773. * @return MP_MEM when dynamic memory allocation fails.
  10774. */
  10775. static int _sp_mul_64(const sp_int* a, const sp_int* b, sp_int* r)
  10776. {
  10777. int err = MP_OKAY;
  10778. unsigned int i;
  10779. sp_int_digit l;
  10780. sp_int_digit h;
  10781. sp_int* a1;
  10782. sp_int* b1;
  10783. sp_int* z0;
  10784. sp_int* z1;
  10785. sp_int* z2;
  10786. sp_int_digit ca;
  10787. sp_int_digit cb;
  10788. DECL_SP_INT_ARRAY(t, 32, 2);
  10789. DECL_SP_INT_ARRAY(z, 65, 2);
  10790. ALLOC_SP_INT_ARRAY(t, 32, 2, err, NULL);
  10791. ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
  10792. if (err == MP_OKAY) {
  10793. a1 = t[0];
  10794. b1 = t[1];
  10795. z1 = z[0];
  10796. z2 = z[1];
  10797. z0 = r;
  10798. XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
  10799. a1->used = 32;
  10800. XMEMCPY(b1->dp, &b->dp[32], sizeof(sp_int_digit) * 32);
  10801. b1->used = 32;
  10802. /* z2 = a1 * b1 */
  10803. err = _sp_mul_32(a1, b1, z2);
  10804. }
  10805. if (err == MP_OKAY) {
  10806. l = a1->dp[0];
  10807. h = 0;
  10808. SP_ASM_ADDC(l, h, a->dp[0]);
  10809. a1->dp[0] = l;
  10810. l = h;
  10811. h = 0;
  10812. for (i = 1; i < 32; i++) {
  10813. SP_ASM_ADDC(l, h, a1->dp[i]);
  10814. SP_ASM_ADDC(l, h, a->dp[i]);
  10815. a1->dp[i] = l;
  10816. l = h;
  10817. h = 0;
  10818. }
  10819. ca = l;
  10820. /* b01 = b0 + b1 */
  10821. l = b1->dp[0];
  10822. h = 0;
  10823. SP_ASM_ADDC(l, h, b->dp[0]);
  10824. b1->dp[0] = l;
  10825. l = h;
  10826. h = 0;
  10827. for (i = 1; i < 32; i++) {
  10828. SP_ASM_ADDC(l, h, b1->dp[i]);
  10829. SP_ASM_ADDC(l, h, b->dp[i]);
  10830. b1->dp[i] = l;
  10831. l = h;
  10832. h = 0;
  10833. }
  10834. cb = l;
  10835. /* z0 = a0 * b0 */
  10836. err = _sp_mul_32(a, b, z0);
  10837. }
  10838. if (err == MP_OKAY) {
  10839. /* z1 = (a0 + a1) * (b0 + b1) */
  10840. err = _sp_mul_32(a1, b1, z1);
  10841. }
  10842. if (err == MP_OKAY) {
  10843. /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
  10844. /* r = z0 */
  10845. /* r += (z1 - z0 - z2) << 32 */
  10846. z1->dp[64] = ca & cb;
  10847. l = 0;
  10848. if (ca) {
  10849. h = 0;
  10850. for (i = 0; i < 32; i++) {
  10851. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  10852. SP_ASM_ADDC(l, h, b1->dp[i]);
  10853. z1->dp[i + 32] = l;
  10854. l = h;
  10855. h = 0;
  10856. }
  10857. }
  10858. z1->dp[64] += l;
  10859. l = 0;
  10860. if (cb) {
  10861. h = 0;
  10862. for (i = 0; i < 32; i++) {
  10863. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  10864. SP_ASM_ADDC(l, h, a1->dp[i]);
  10865. z1->dp[i + 32] = l;
  10866. l = h;
  10867. h = 0;
  10868. }
  10869. }
  10870. z1->dp[64] += l;
  10871. /* z1 = z1 - z0 - z1 */
  10872. l = 0;
  10873. h = 0;
  10874. for (i = 0; i < 64; i++) {
  10875. l += z1->dp[i];
  10876. SP_ASM_SUBB(l, h, z0->dp[i]);
  10877. SP_ASM_SUBB(l, h, z2->dp[i]);
  10878. z1->dp[i] = l;
  10879. l = h;
  10880. h = 0;
  10881. }
  10882. z1->dp[i] += l;
  10883. /* r += z1 << 16 */
  10884. l = 0;
  10885. h = 0;
  10886. for (i = 0; i < 32; i++) {
  10887. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  10888. SP_ASM_ADDC(l, h, z1->dp[i]);
  10889. r->dp[i + 32] = l;
  10890. l = h;
  10891. h = 0;
  10892. }
  10893. for (; i < 65; i++) {
  10894. SP_ASM_ADDC(l, h, z1->dp[i]);
  10895. r->dp[i + 32] = l;
  10896. l = h;
  10897. h = 0;
  10898. }
  10899. /* r += z2 << 64 */
  10900. l = 0;
  10901. h = 0;
  10902. for (i = 0; i < 33; i++) {
  10903. SP_ASM_ADDC(l, h, r->dp[i + 64]);
  10904. SP_ASM_ADDC(l, h, z2->dp[i]);
  10905. r->dp[i + 64] = l;
  10906. l = h;
  10907. h = 0;
  10908. }
  10909. for (; i < 64; i++) {
  10910. SP_ASM_ADDC(l, h, z2->dp[i]);
  10911. r->dp[i + 64] = l;
  10912. l = h;
  10913. h = 0;
  10914. }
  10915. r->used = 128;
  10916. sp_clamp(r);
  10917. }
  10918. FREE_SP_INT_ARRAY(z, NULL);
  10919. FREE_SP_INT_ARRAY(t, NULL);
  10920. return err;
  10921. }
  10922. #endif /* SP_INT_DIGITS >= 128 */
  10923. #if SP_INT_DIGITS >= 192
  10924. /* Multiply a by b and store in r: r = a * b
  10925. *
  10926. * Karatsuba implementation.
  10927. *
  10928. * @param [in] a SP integer to multiply.
  10929. * @param [in] b SP integer to multiply.
  10930. * @param [out] r SP integer result.
  10931. *
  10932. * @return MP_OKAY on success.
  10933. * @return MP_MEM when dynamic memory allocation fails.
  10934. */
  10935. static int _sp_mul_96(const sp_int* a, const sp_int* b, sp_int* r)
  10936. {
  10937. int err = MP_OKAY;
  10938. unsigned int i;
  10939. sp_int_digit l;
  10940. sp_int_digit h;
  10941. sp_int* a1;
  10942. sp_int* b1;
  10943. sp_int* z0;
  10944. sp_int* z1;
  10945. sp_int* z2;
  10946. sp_int_digit ca;
  10947. sp_int_digit cb;
  10948. DECL_SP_INT_ARRAY(t, 48, 2);
  10949. DECL_SP_INT_ARRAY(z, 97, 2);
  10950. ALLOC_SP_INT_ARRAY(t, 48, 2, err, NULL);
  10951. ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
  10952. if (err == MP_OKAY) {
  10953. a1 = t[0];
  10954. b1 = t[1];
  10955. z1 = z[0];
  10956. z2 = z[1];
  10957. z0 = r;
  10958. XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
  10959. a1->used = 48;
  10960. XMEMCPY(b1->dp, &b->dp[48], sizeof(sp_int_digit) * 48);
  10961. b1->used = 48;
  10962. /* z2 = a1 * b1 */
  10963. err = _sp_mul_48(a1, b1, z2);
  10964. }
  10965. if (err == MP_OKAY) {
  10966. l = a1->dp[0];
  10967. h = 0;
  10968. SP_ASM_ADDC(l, h, a->dp[0]);
  10969. a1->dp[0] = l;
  10970. l = h;
  10971. h = 0;
  10972. for (i = 1; i < 48; i++) {
  10973. SP_ASM_ADDC(l, h, a1->dp[i]);
  10974. SP_ASM_ADDC(l, h, a->dp[i]);
  10975. a1->dp[i] = l;
  10976. l = h;
  10977. h = 0;
  10978. }
  10979. ca = l;
  10980. /* b01 = b0 + b1 */
  10981. l = b1->dp[0];
  10982. h = 0;
  10983. SP_ASM_ADDC(l, h, b->dp[0]);
  10984. b1->dp[0] = l;
  10985. l = h;
  10986. h = 0;
  10987. for (i = 1; i < 48; i++) {
  10988. SP_ASM_ADDC(l, h, b1->dp[i]);
  10989. SP_ASM_ADDC(l, h, b->dp[i]);
  10990. b1->dp[i] = l;
  10991. l = h;
  10992. h = 0;
  10993. }
  10994. cb = l;
  10995. /* z0 = a0 * b0 */
  10996. err = _sp_mul_48(a, b, z0);
  10997. }
  10998. if (err == MP_OKAY) {
  10999. /* z1 = (a0 + a1) * (b0 + b1) */
  11000. err = _sp_mul_48(a1, b1, z1);
  11001. }
  11002. if (err == MP_OKAY) {
  11003. /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
  11004. /* r = z0 */
  11005. /* r += (z1 - z0 - z2) << 48 */
  11006. z1->dp[96] = ca & cb;
  11007. l = 0;
  11008. if (ca) {
  11009. h = 0;
  11010. for (i = 0; i < 48; i++) {
  11011. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  11012. SP_ASM_ADDC(l, h, b1->dp[i]);
  11013. z1->dp[i + 48] = l;
  11014. l = h;
  11015. h = 0;
  11016. }
  11017. }
  11018. z1->dp[96] += l;
  11019. l = 0;
  11020. if (cb) {
  11021. h = 0;
  11022. for (i = 0; i < 48; i++) {
  11023. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  11024. SP_ASM_ADDC(l, h, a1->dp[i]);
  11025. z1->dp[i + 48] = l;
  11026. l = h;
  11027. h = 0;
  11028. }
  11029. }
  11030. z1->dp[96] += l;
  11031. /* z1 = z1 - z0 - z1 */
  11032. l = 0;
  11033. h = 0;
  11034. for (i = 0; i < 96; i++) {
  11035. l += z1->dp[i];
  11036. SP_ASM_SUBB(l, h, z0->dp[i]);
  11037. SP_ASM_SUBB(l, h, z2->dp[i]);
  11038. z1->dp[i] = l;
  11039. l = h;
  11040. h = 0;
  11041. }
  11042. z1->dp[i] += l;
  11043. /* r += z1 << 16 */
  11044. l = 0;
  11045. h = 0;
  11046. for (i = 0; i < 48; i++) {
  11047. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  11048. SP_ASM_ADDC(l, h, z1->dp[i]);
  11049. r->dp[i + 48] = l;
  11050. l = h;
  11051. h = 0;
  11052. }
  11053. for (; i < 97; i++) {
  11054. SP_ASM_ADDC(l, h, z1->dp[i]);
  11055. r->dp[i + 48] = l;
  11056. l = h;
  11057. h = 0;
  11058. }
  11059. /* r += z2 << 96 */
  11060. l = 0;
  11061. h = 0;
  11062. for (i = 0; i < 49; i++) {
  11063. SP_ASM_ADDC(l, h, r->dp[i + 96]);
  11064. SP_ASM_ADDC(l, h, z2->dp[i]);
  11065. r->dp[i + 96] = l;
  11066. l = h;
  11067. h = 0;
  11068. }
  11069. for (; i < 96; i++) {
  11070. SP_ASM_ADDC(l, h, z2->dp[i]);
  11071. r->dp[i + 96] = l;
  11072. l = h;
  11073. h = 0;
  11074. }
  11075. r->used = 192;
  11076. sp_clamp(r);
  11077. }
  11078. FREE_SP_INT_ARRAY(z, NULL);
  11079. FREE_SP_INT_ARRAY(t, NULL);
  11080. return err;
  11081. }
  11082. #endif /* SP_INT_DIGITS >= 192 */
  11083. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  11084. #endif /* !WOLFSSL_SP_SMALL */
  11085. /* Multiply a by b and store in r: r = a * b
  11086. *
  11087. * @param [in] a SP integer to multiply.
  11088. * @param [in] b SP integer to multiply.
  11089. * @param [out] r SP integer result.
  11090. *
  11091. * @return MP_OKAY on success.
  11092. * @return MP_VAL when a, b or is NULL; or the result will be too big for fixed
  11093. * data length.
  11094. * @return MP_MEM when dynamic memory allocation fails.
  11095. */
  11096. int sp_mul(const sp_int* a, const sp_int* b, sp_int* r)
  11097. {
  11098. int err = MP_OKAY;
  11099. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11100. unsigned int sign = MP_ZPOS;
  11101. #endif
  11102. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  11103. err = MP_VAL;
  11104. }
  11105. /* Need extra digit during calculation. */
  11106. if ((err == MP_OKAY) && (a->used + b->used > r->size)) {
  11107. err = MP_VAL;
  11108. }
  11109. #if 0
  11110. if (err == MP_OKAY) {
  11111. sp_print(a, "a");
  11112. sp_print(b, "b");
  11113. }
  11114. #endif
  11115. if (err == MP_OKAY) {
  11116. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11117. sign = a->sign ^ b->sign;
  11118. #endif
  11119. if ((a->used == 0) || (b->used == 0)) {
  11120. _sp_zero(r);
  11121. }
  11122. else
  11123. #ifndef WOLFSSL_SP_SMALL
  11124. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  11125. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  11126. if ((a->used == 4) && (b->used == 4)) {
  11127. err = _sp_mul_4(a, b, r);
  11128. }
  11129. else
  11130. #endif /* SP_WORD_SIZE == 64 */
  11131. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  11132. #ifdef SQR_MUL_ASM
  11133. if ((a->used == 6) && (b->used == 6)) {
  11134. err = _sp_mul_6(a, b, r);
  11135. }
  11136. else
  11137. #endif /* SQR_MUL_ASM */
  11138. #endif /* SP_WORD_SIZE == 64 */
  11139. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  11140. #ifdef SQR_MUL_ASM
  11141. if ((a->used == 8) && (b->used == 8)) {
  11142. err = _sp_mul_8(a, b, r);
  11143. }
  11144. else
  11145. #endif /* SQR_MUL_ASM */
  11146. #endif /* SP_WORD_SIZE == 32 */
  11147. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  11148. #ifdef SQR_MUL_ASM
  11149. if ((a->used == 12) && (b->used == 12)) {
  11150. err = _sp_mul_12(a, b, r);
  11151. }
  11152. else
  11153. #endif /* SQR_MUL_ASM */
  11154. #endif /* SP_WORD_SIZE == 32 */
  11155. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  11156. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  11157. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  11158. (SP_WORD_SIZE == 64)))
  11159. #if SP_INT_DIGITS >= 32
  11160. if ((a->used == 16) && (b->used == 16)) {
  11161. err = _sp_mul_16(a, b, r);
  11162. }
  11163. else
  11164. #endif /* SP_INT_DIGITS >= 32 */
  11165. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  11166. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  11167. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  11168. #if SP_INT_DIGITS >= 48
  11169. if ((a->used == 24) && (b->used == 24)) {
  11170. err = _sp_mul_24(a, b, r);
  11171. }
  11172. else
  11173. #endif /* SP_INT_DIGITS >= 48 */
  11174. #if SP_INT_DIGITS >= 64
  11175. if ((a->used == 32) && (b->used == 32)) {
  11176. err = _sp_mul_32(a, b, r);
  11177. }
  11178. else
  11179. #endif /* SP_INT_DIGITS >= 64 */
  11180. #if SP_INT_DIGITS >= 96
  11181. if ((a->used == 48) && (b->used == 48)) {
  11182. err = _sp_mul_48(a, b, r);
  11183. }
  11184. else
  11185. #endif /* SP_INT_DIGITS >= 96 */
  11186. #if SP_INT_DIGITS >= 128
  11187. if ((a->used == 64) && (b->used == 64)) {
  11188. err = _sp_mul_64(a, b, r);
  11189. }
  11190. else
  11191. #endif /* SP_INT_DIGITS >= 128 */
  11192. #if SP_INT_DIGITS >= 192
  11193. if ((a->used == 96) && (b->used == 96)) {
  11194. err = _sp_mul_96(a, b, r);
  11195. }
  11196. else
  11197. #endif /* SP_INT_DIGITS >= 192 */
  11198. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  11199. #endif /* !WOLFSSL_SP_SMALL */
  11200. #ifdef SQR_MUL_ASM
  11201. if (a->used == b->used) {
  11202. err = _sp_mul_nxn(a, b, r);
  11203. }
  11204. else
  11205. #endif
  11206. {
  11207. err = _sp_mul(a, b, r);
  11208. }
  11209. }
  11210. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11211. if (err == MP_OKAY) {
  11212. r->sign = (r->used == 0) ? MP_ZPOS : sign;
  11213. }
  11214. #endif
  11215. #if 0
  11216. if (err == MP_OKAY) {
  11217. sp_print(r, "rmul");
  11218. }
  11219. #endif
  11220. return err;
  11221. }
  11222. /* END SP_MUL implementations. */
  11223. #endif
  11224. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  11225. defined(WOLFCRYPT_HAVE_ECCSI) || \
  11226. (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || defined(OPENSSL_ALL)
  11227. /* Multiply a by b mod m and store in r: r = (a * b) mod m
  11228. *
  11229. * @param [in] a SP integer to multiply.
  11230. * @param [in] b SP integer to multiply.
  11231. * @param [in] m SP integer that is the modulus.
  11232. * @param [out] r SP integer result.
  11233. *
  11234. * @return MP_OKAY on success.
  11235. * @return MP_MEM when dynamic memory allocation fails.
  11236. */
  11237. static int _sp_mulmod_tmp(const sp_int* a, const sp_int* b, const sp_int* m,
  11238. sp_int* r)
  11239. {
  11240. int err = MP_OKAY;
  11241. /* Create temporary for multiplication result. */
  11242. DECL_SP_INT(t, a->used + b->used);
  11243. ALLOC_SP_INT(t, a->used + b->used, err, NULL);
  11244. if (err == MP_OKAY) {
  11245. err = sp_init_size(t, a->used + b->used);
  11246. }
  11247. /* Multiply and reduce. */
  11248. if (err == MP_OKAY) {
  11249. err = sp_mul(a, b, t);
  11250. }
  11251. if (err == MP_OKAY) {
  11252. err = sp_mod(t, m, r);
  11253. }
  11254. /* Dispose of an allocated SP int. */
  11255. FREE_SP_INT(t, NULL);
  11256. return err;
  11257. }
  11258. /* Multiply a by b mod m and store in r: r = (a * b) mod m
  11259. *
  11260. * @param [in] a SP integer to multiply.
  11261. * @param [in] b SP integer to multiply.
  11262. * @param [in] m SP integer that is the modulus.
  11263. * @param [out] r SP integer result.
  11264. *
  11265. * @return MP_OKAY on success.
  11266. * @return MP_MEM when dynamic memory allocation fails.
  11267. */
  11268. static int _sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m,
  11269. sp_int* r)
  11270. {
  11271. int err = MP_OKAY;
  11272. /* Use r as intermediate result if not same as pointer m which is needed
  11273. * after first intermediate result.
  11274. */
  11275. if (r != m) {
  11276. /* Multiply and reduce. */
  11277. err = sp_mul(a, b, r);
  11278. if (err == MP_OKAY) {
  11279. err = sp_mod(r, m, r);
  11280. }
  11281. }
  11282. else {
  11283. /* Do operation using temporary. */
  11284. err = _sp_mulmod_tmp(a, b, m, r);
  11285. }
  11286. return err;
  11287. }
  11288. /* Multiply a by b mod m and store in r: r = (a * b) mod m
  11289. *
  11290. * @param [in] a SP integer to multiply.
  11291. * @param [in] b SP integer to multiply.
  11292. * @param [in] m SP integer that is the modulus.
  11293. * @param [out] r SP integer result.
  11294. *
  11295. * @return MP_OKAY on success.
  11296. * @return MP_VAL when a, b, m or r is NULL; m is 0; or a * b is too big for
  11297. * fixed data length.
  11298. * @return MP_MEM when dynamic memory allocation fails.
  11299. */
  11300. int sp_mulmod(const sp_int* a, const sp_int* b, const sp_int* m, sp_int* r)
  11301. {
  11302. int err = MP_OKAY;
  11303. /* Validate parameters. */
  11304. if ((a == NULL) || (b == NULL) || (m == NULL) || (r == NULL)) {
  11305. err = MP_VAL;
  11306. }
  11307. /* Ensure result SP int is big enough for intermediates. */
  11308. if ((err == MP_OKAY) && (r != m) && (a->used + b->used > r->size)) {
  11309. err = MP_VAL;
  11310. }
  11311. #if 0
  11312. if (err == 0) {
  11313. sp_print(a, "a");
  11314. sp_print(b, "b");
  11315. sp_print(m, "m");
  11316. }
  11317. #endif
  11318. if (err == MP_OKAY) {
  11319. err = _sp_mulmod(a, b, m, r);
  11320. }
  11321. #if 0
  11322. if (err == 0) {
  11323. sp_print(r, "rmm");
  11324. }
  11325. #endif
  11326. return err;
  11327. }
  11328. #endif
  11329. #ifdef WOLFSSL_SP_INVMOD
  11330. /* Calculates the multiplicative inverse in the field. r*a = x*m + 1
  11331. * Right-shift Algorithm. NOT constant time.
  11332. *
  11333. * Algorithm:
  11334. * 1. u = m, v = a, b = 0, c = 1
  11335. * 2. While v != 1 and u != 0
  11336. * 2.1. If u even
  11337. * 2.1.1. u /= 2
  11338. * 2.1.2. b = (b / 2) mod m
  11339. * 2.2. Else if v even
  11340. * 2.2.1. v /= 2
  11341. * 2.2.2. c = (c / 2) mod m
  11342. * 2.3. Else if u >= v
  11343. * 2.3.1. u -= v
  11344. * 2.3.2. b = (c - b) mod m
  11345. * 2.4. Else (v > u)
  11346. * 2.4.1. v -= u
  11347. * 2.4.2. c = (b - c) mod m
  11348. * 3. NO_INVERSE if u == 0
  11349. *
  11350. * @param [in] a SP integer to find inverse of.
  11351. * @param [in] m SP integer this is the modulus.
  11352. * @param [in] u SP integer to use in calculation.
  11353. * @param [in] v SP integer to use in calculation.
  11354. * @param [in] b SP integer to use in calculation
  11355. * @param [out] c SP integer that is the inverse.
  11356. *
  11357. * @return MP_OKAY on success.
  11358. * @return MP_VAL when no inverse.
  11359. */
  11360. static int _sp_invmod_bin(const sp_int* a, const sp_int* m, sp_int* u,
  11361. sp_int* v, sp_int* b, sp_int* c)
  11362. {
  11363. int err = MP_OKAY;
  11364. /* 1. u = m, v = a, b = 0, c = 1 */
  11365. _sp_copy(m, u);
  11366. if (a != v) {
  11367. _sp_copy(a, v);
  11368. }
  11369. _sp_zero(b);
  11370. _sp_set(c, 1);
  11371. /* 2. While v != 1 and u != 0 */
  11372. while (!sp_isone(v) && !sp_iszero(u)) {
  11373. /* 2.1. If u even */
  11374. if ((u->dp[0] & 1) == 0) {
  11375. /* 2.1.1. u /= 2 */
  11376. _sp_div_2(u, u);
  11377. /* 2.1.2. b = (b / 2) mod m */
  11378. if (sp_isodd(b)) {
  11379. _sp_add_off(b, m, b, 0);
  11380. }
  11381. _sp_div_2(b, b);
  11382. }
  11383. /* 2.2. Else if v even */
  11384. else if ((v->dp[0] & 1) == 0) {
  11385. /* 2.2.1. v /= 2 */
  11386. _sp_div_2(v, v);
  11387. /* 2.1.2. c = (c / 2) mod m */
  11388. if (sp_isodd(c)) {
  11389. _sp_add_off(c, m, c, 0);
  11390. }
  11391. _sp_div_2(c, c);
  11392. }
  11393. /* 2.3. Else if u >= v */
  11394. else if (_sp_cmp_abs(u, v) != MP_LT) {
  11395. /* 2.3.1. u -= v */
  11396. _sp_sub_off(u, v, u, 0);
  11397. /* 2.3.2. b = (c - b) mod m */
  11398. if (_sp_cmp_abs(b, c) == MP_LT) {
  11399. _sp_add_off(b, m, b, 0);
  11400. }
  11401. _sp_sub_off(b, c, b, 0);
  11402. }
  11403. /* 2.4. Else (v > u) */
  11404. else {
  11405. /* 2.4.1. v -= u */
  11406. _sp_sub_off(v, u, v, 0);
  11407. /* 2.4.2. c = (b - c) mod m */
  11408. if (_sp_cmp_abs(c, b) == MP_LT) {
  11409. _sp_add_off(c, m, c, 0);
  11410. }
  11411. _sp_sub_off(c, b, c, 0);
  11412. }
  11413. }
  11414. /* 3. NO_INVERSE if u == 0 */
  11415. if (sp_iszero(u)) {
  11416. err = MP_VAL;
  11417. }
  11418. return err;
  11419. }
  11420. #if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
  11421. (!defined(NO_RSA) || !defined(NO_DH))
  11422. /* Calculates the multiplicative inverse in the field. r*a = x*m + 1
  11423. * Extended Euclidean Algorithm. NOT constant time.
  11424. *
  11425. * Creates two new SP ints.
  11426. *
  11427. * Algorithm:
  11428. * 1. x = m, y = a, b = 1, c = 0
  11429. * 2. while x > 1
  11430. * 2.1. d = x / y, r = x mod y
  11431. * 2.2. c -= d * b
  11432. * 2.3. x = y, y = r
  11433. * 2.4. s = b, b = c, c = s
  11434. * 3. If y != 0 then NO_INVERSE
  11435. * 4. If c < 0 then c += m
  11436. * 5. inv = c
  11437. *
  11438. * @param [in] a SP integer to find inverse of.
  11439. * @param [in] m SP integer this is the modulus.
  11440. * @param [in] u SP integer to use in calculation.
  11441. * @param [in] v SP integer to use in calculation.
  11442. * @param [in] b SP integer to use in calculation
  11443. * @param [in] c SP integer to use in calculation
  11444. * @param [out] inv SP integer that is the inverse.
  11445. *
  11446. * @return MP_OKAY on success.
  11447. * @return MP_VAL when no inverse.
  11448. * @return MP_MEM when dynamic memory allocation fails.
  11449. */
  11450. static int _sp_invmod_div(const sp_int* a, const sp_int* m, sp_int* x,
  11451. sp_int* y, sp_int* b, sp_int* c, sp_int* inv)
  11452. {
  11453. int err = MP_OKAY;
  11454. sp_int* s;
  11455. #ifndef WOLFSSL_SP_INT_NEGATIVE
  11456. int bneg = 0;
  11457. int cneg = 0;
  11458. int neg;
  11459. #endif
  11460. DECL_SP_INT(d, m->used + 1);
  11461. ALLOC_SP_INT(d, m->used + 1, err, NULL);
  11462. if (err == MP_OKAY) {
  11463. mp_init(d);
  11464. /* 1. x = m, y = a, b = 1, c = 0 */
  11465. if (a != y) {
  11466. _sp_copy(a, y);
  11467. }
  11468. _sp_copy(m, x);
  11469. _sp_set(b, 1);
  11470. _sp_zero(c);
  11471. }
  11472. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11473. /* 2. while x > 1 */
  11474. while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
  11475. /* 2.1. d = x / y, r = x mod y */
  11476. err = sp_div(x, y, d, x);
  11477. if (err == MP_OKAY) {
  11478. /* 2.2. c -= d * b */
  11479. if (sp_isone(d)) {
  11480. /* c -= 1 * b */
  11481. err = sp_sub(c, b, c);
  11482. }
  11483. else {
  11484. /* d *= b */
  11485. err = sp_mul(d, b, d);
  11486. /* c -= d */
  11487. if (err == MP_OKAY) {
  11488. err = sp_sub(c, d, c);
  11489. }
  11490. }
  11491. /* 2.3. x = y, y = r */
  11492. s = y; y = x; x = s;
  11493. /* 2.4. s = b, b = c, c = s */
  11494. s = b; b = c; c = s;
  11495. }
  11496. }
  11497. /* 3. If y != 0 then NO_INVERSE */
  11498. if ((err == MP_OKAY) && (!sp_iszero(y))) {
  11499. err = MP_VAL;
  11500. }
  11501. /* 4. If c < 0 then c += m */
  11502. if ((err == MP_OKAY) && sp_isneg(c)) {
  11503. err = sp_add(c, m, c);
  11504. }
  11505. if (err == MP_OKAY) {
  11506. /* 5. inv = c */
  11507. err = sp_copy(c, inv);
  11508. }
  11509. #else
  11510. /* 2. while x > 1 */
  11511. while ((err == MP_OKAY) && (!sp_isone(x)) && (!sp_iszero(x))) {
  11512. /* 2.1. d = x / y, r = x mod y */
  11513. err = sp_div(x, y, d, x);
  11514. if (err == MP_OKAY) {
  11515. if (sp_isone(d)) {
  11516. /* c -= 1 * b */
  11517. if ((bneg ^ cneg) == 1) {
  11518. /* c -= -b or -c -= b, therefore add. */
  11519. _sp_add_off(c, b, c, 0);
  11520. }
  11521. else if (_sp_cmp_abs(c, b) == MP_LT) {
  11522. /* |c| < |b| and same sign, reverse subtract and negate. */
  11523. _sp_sub_off(b, c, c, 0);
  11524. cneg = !cneg;
  11525. }
  11526. else {
  11527. /* |c| >= |b| */
  11528. _sp_sub_off(c, b, c, 0);
  11529. }
  11530. }
  11531. else {
  11532. /* d *= b */
  11533. err = sp_mul(d, b, d);
  11534. /* c -= d */
  11535. if (err == MP_OKAY) {
  11536. if ((bneg ^ cneg) == 1) {
  11537. /* c -= -d or -c -= d, therefore add. */
  11538. _sp_add_off(c, d, c, 0);
  11539. }
  11540. else if (_sp_cmp_abs(c, d) == MP_LT) {
  11541. /* |c| < |d| and same sign, reverse subtract and negate.
  11542. */
  11543. _sp_sub_off(d, c, c, 0);
  11544. cneg = !cneg;
  11545. }
  11546. else {
  11547. _sp_sub_off(c, d, c, 0);
  11548. }
  11549. }
  11550. }
  11551. /* 2.3. x = y, y = r */
  11552. s = y; y = x; x = s;
  11553. /* 2.4. s = b, b = c, c = s */
  11554. s = b; b = c; c = s;
  11555. neg = bneg; bneg = cneg; cneg = neg;
  11556. }
  11557. }
  11558. /* 3. If y != 0 then NO_INVERSE */
  11559. if ((err == MP_OKAY) && (!sp_iszero(y))) {
  11560. err = MP_VAL;
  11561. }
  11562. /* 4. If c < 0 then c += m */
  11563. if ((err == MP_OKAY) && cneg) {
  11564. /* c = m - |c| */
  11565. _sp_sub_off(m, c, c, 0);
  11566. }
  11567. if (err == MP_OKAY) {
  11568. /* 5. inv = c */
  11569. err = sp_copy(c, inv);
  11570. }
  11571. #endif
  11572. FREE_SP_INT(d, NULL);
  11573. return err;
  11574. }
  11575. #endif
  11576. /* Calculates the multiplicative inverse in the field.
  11577. * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
  11578. *
  11579. * r*a = x*m + 1
  11580. *
  11581. * @param [in] a SP integer to find inverse of.
  11582. * @param [in] m SP integer this is the modulus.
  11583. * @param [out] r SP integer to hold result. r cannot be m.
  11584. *
  11585. * @return MP_OKAY on success.
  11586. * @return MP_VAL when m is even and a divides m evenly.
  11587. * @return MP_MEM when dynamic memory allocation fails.
  11588. */
  11589. static int _sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
  11590. {
  11591. int err = MP_OKAY;
  11592. sp_int* u = NULL;
  11593. sp_int* v = NULL;
  11594. sp_int* b = NULL;
  11595. DECL_SP_INT_ARRAY(t, m->used + 1, 3);
  11596. DECL_SP_INT(c, 2 * m->used + 1);
  11597. /* Allocate SP ints:
  11598. * - x3 one word larger than modulus
  11599. * - x1 one word longer than twice modulus used
  11600. */
  11601. ALLOC_SP_INT_ARRAY(t, m->used + 1, 3, err, NULL);
  11602. ALLOC_SP_INT(c, 2 * m->used + 1, err, NULL);
  11603. if (err == MP_OKAY) {
  11604. u = t[0];
  11605. v = t[1];
  11606. b = t[2];
  11607. /* c allocated separately and larger for even mod case. */
  11608. }
  11609. /* Initialize intermediate values with minimal sizes. */
  11610. if (err == MP_OKAY) {
  11611. err = sp_init_size(u, m->used + 1);
  11612. }
  11613. if (err == MP_OKAY) {
  11614. err = sp_init_size(v, m->used + 1);
  11615. }
  11616. if (err == MP_OKAY) {
  11617. err = sp_init_size(b, m->used + 1);
  11618. }
  11619. if (err == MP_OKAY) {
  11620. err = sp_init_size(c, 2 * m->used + 1);
  11621. }
  11622. if (err == MP_OKAY) {
  11623. const sp_int* mm = m;
  11624. const sp_int* ma = a;
  11625. int evenMod = 0;
  11626. if (sp_iseven(m)) {
  11627. /* a^-1 mod m = m + ((1 - m*(m^-1 % a)) / a) */
  11628. mm = a;
  11629. ma = v;
  11630. _sp_copy(a, u);
  11631. err = sp_mod(m, a, v);
  11632. /* v == 0 when a divides m evenly - no inverse. */
  11633. if ((err == MP_OKAY) && sp_iszero(v)) {
  11634. err = MP_VAL;
  11635. }
  11636. evenMod = 1;
  11637. }
  11638. if (err == MP_OKAY) {
  11639. /* Calculate inverse. */
  11640. #if !defined(WOLFSSL_SP_LOW_MEM) && !defined(WOLFSSL_SP_SMALL) && \
  11641. (!defined(NO_RSA) || !defined(NO_DH))
  11642. if (sp_count_bits(mm) >= 1024) {
  11643. err = _sp_invmod_div(ma, mm, u, v, b, c, c);
  11644. }
  11645. else
  11646. #endif
  11647. {
  11648. err = _sp_invmod_bin(ma, mm, u, v, b, c);
  11649. }
  11650. }
  11651. /* Fixup for even modulus. */
  11652. if ((err == MP_OKAY) && evenMod) {
  11653. /* Finish operation.
  11654. * a^-1 mod m = m + ((1 - m*c) / a)
  11655. * => a^-1 mod m = m - ((m*c - 1) / a)
  11656. */
  11657. err = sp_mul(c, m, c);
  11658. if (err == MP_OKAY) {
  11659. _sp_sub_d(c, 1, c);
  11660. err = sp_div(c, a, c, NULL);
  11661. }
  11662. if (err == MP_OKAY) {
  11663. err = sp_sub(m, c, r);
  11664. }
  11665. }
  11666. else if (err == MP_OKAY) {
  11667. _sp_copy(c, r);
  11668. }
  11669. }
  11670. FREE_SP_INT(c, NULL);
  11671. FREE_SP_INT_ARRAY(t, NULL);
  11672. return err;
  11673. }
  11674. /* Calculates the multiplicative inverse in the field.
  11675. * Right-shift Algorithm or Extended Euclidean Algorithm. NOT constant time.
  11676. *
  11677. * r*a = x*m + 1
  11678. *
  11679. * @param [in] a SP integer to find inverse of.
  11680. * @param [in] m SP integer this is the modulus.
  11681. * @param [out] r SP integer to hold result. r cannot be m.
  11682. *
  11683. * @return MP_OKAY on success.
  11684. * @return MP_VAL when a, m or r is NULL; a or m is zero; a and m are even or
  11685. * m is negative.
  11686. * @return MP_MEM when dynamic memory allocation fails.
  11687. */
  11688. int sp_invmod(const sp_int* a, const sp_int* m, sp_int* r)
  11689. {
  11690. int err = MP_OKAY;
  11691. /* Validate parameters. */
  11692. if ((a == NULL) || (m == NULL) || (r == NULL) || (r == m)) {
  11693. err = MP_VAL;
  11694. }
  11695. if ((err == MP_OKAY) && (m->used * 2 > r->size)) {
  11696. err = MP_VAL;
  11697. }
  11698. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11699. /* Don't support negative modulus. */
  11700. if ((err == MP_OKAY) && (m->sign == MP_NEG)) {
  11701. err = MP_VAL;
  11702. }
  11703. #endif
  11704. if (err == MP_OKAY) {
  11705. /* Ensure number is less than modulus. */
  11706. if (_sp_cmp_abs(a, m) != MP_LT) {
  11707. err = sp_mod(a, m, r);
  11708. a = r;
  11709. }
  11710. }
  11711. #ifdef WOLFSSL_SP_INT_NEGATIVE
  11712. if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
  11713. /* Make 'a' positive */
  11714. err = sp_add(m, a, r);
  11715. a = r;
  11716. }
  11717. #endif
  11718. /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
  11719. if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m))) {
  11720. err = MP_VAL;
  11721. }
  11722. /* r*2*x != n*2*y + 1 for integer x,y */
  11723. if ((err == MP_OKAY) && sp_iseven(a) && sp_iseven(m)) {
  11724. err = MP_VAL;
  11725. }
  11726. /* 1*1 = 0*m + 1 */
  11727. if ((err == MP_OKAY) && sp_isone(a)) {
  11728. _sp_set(r, 1);
  11729. }
  11730. else if (err == MP_OKAY) {
  11731. err = _sp_invmod(a, m, r);
  11732. }
  11733. return err;
  11734. }
  11735. #endif /* WOLFSSL_SP_INVMOD */
  11736. #ifdef WOLFSSL_SP_INVMOD_MONT_CT
  11737. /* Number of entries to pre-compute.
  11738. * Many pre-defined primes have multiple of 8 consecutive 1s.
  11739. * P-256 modulus - 2 => 32x1, 31x0, 1x1, 96x0, 94x1, 1x0, 1x1.
  11740. */
  11741. #define CT_INV_MOD_PRE_CNT 8
  11742. /* Calculates the multiplicative inverse in the field - constant time.
  11743. *
  11744. * Modulus (m) must be a prime and greater than 2.
  11745. * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
  11746. *
  11747. * Algorithm:
  11748. * pre = pre-computed values, m = modulus, a = value to find inverse of,
  11749. * e = exponent
  11750. * Pre-calc:
  11751. * 1. pre[0] = 2^0 * a mod m
  11752. * 2. For i in 2..CT_INV_MOD_PRE_CNT
  11753. * 2.1. pre[i-1] = ((pre[i-2] ^ 2) * a) mod m
  11754. * Calc inverse:
  11755. * 1. e = m - 2
  11756. * 2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
  11757. * 3. t = pre[j-1]
  11758. * 4. s = 0
  11759. * 5. j = 0
  11760. * 6. For i index of next top bit..0
  11761. * 6.1. bit = e[i]
  11762. * 6.2. j += bit
  11763. * 6.3. s += 1
  11764. * 6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
  11765. * 6.4.1. s -= 1 - bit
  11766. * 6.4.2. For s downto 1
  11767. * 6.4.2.1. t = (t ^ 2) mod m
  11768. * 6.4.3. s = 1 - bit
  11769. * 6.4.4. t = (t * pre[j-1]) mod m
  11770. * 6.4.5. j = 0
  11771. * 7. For s downto 1
  11772. * 7.1. t = (t ^ 2) mod m
  11773. * 8. If j > 0 then r = (t * pre[j-1]) mod m
  11774. * 9. Else r = t
  11775. *
  11776. * @param [in] a SP integer, Montgomery form, to find inverse of.
  11777. * @param [in] m SP integer this is the modulus.
  11778. * @param [out] r SP integer to hold result.
  11779. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  11780. *
  11781. * @return MP_OKAY on success.
  11782. * @return MP_MEM when dynamic memory allocation fails.
  11783. */
  11784. static int _sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
  11785. sp_int_digit mp)
  11786. {
  11787. int err = MP_OKAY;
  11788. int i;
  11789. int j = 0;
  11790. int s = 0;
  11791. sp_int* t = NULL;
  11792. sp_int* e = NULL;
  11793. #ifndef WOLFSSL_SP_NO_MALLOC
  11794. DECL_DYN_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
  11795. #else
  11796. DECL_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2);
  11797. #endif
  11798. #ifndef WOLFSSL_SP_NO_MALLOC
  11799. ALLOC_DYN_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err,
  11800. NULL);
  11801. #else
  11802. ALLOC_SP_INT_ARRAY(pre, m->used * 2 + 1, CT_INV_MOD_PRE_CNT + 2, err, NULL);
  11803. #endif
  11804. if (err == MP_OKAY) {
  11805. t = pre[CT_INV_MOD_PRE_CNT + 0];
  11806. e = pre[CT_INV_MOD_PRE_CNT + 1];
  11807. /* Space for sqr and mul result. */
  11808. _sp_init_size(t, m->used * 2 + 1);
  11809. /* e = mod - 2 */
  11810. _sp_init_size(e, m->used + 1);
  11811. /* Create pre-computation results: ((2^(1..8))-1).a. */
  11812. _sp_init_size(pre[0], m->used * 2 + 1);
  11813. /* 1. pre[0] = 2^0 * a mod m
  11814. * Start with 1.a = a.
  11815. */
  11816. _sp_copy(a, pre[0]);
  11817. /* 2. For i in 2..CT_INV_MOD_PRE_CNT
  11818. * For rest of entries in table.
  11819. */
  11820. for (i = 1; (err == MP_OKAY) && (i < CT_INV_MOD_PRE_CNT); i++) {
  11821. /* 2.1 pre[i-1] = ((pre[i-1] ^ 2) * a) mod m */
  11822. /* Previous value ..1 -> ..10 */
  11823. _sp_init_size(pre[i], m->used * 2 + 1);
  11824. err = sp_sqr(pre[i-1], pre[i]);
  11825. if (err == MP_OKAY) {
  11826. err = _sp_mont_red(pre[i], m, mp);
  11827. }
  11828. /* ..10 -> ..11 */
  11829. if (err == MP_OKAY) {
  11830. err = sp_mul(pre[i], a, pre[i]);
  11831. }
  11832. if (err == MP_OKAY) {
  11833. err = _sp_mont_red(pre[i], m, mp);
  11834. }
  11835. }
  11836. }
  11837. if (err == MP_OKAY) {
  11838. /* 1. e = m - 2 */
  11839. _sp_sub_d(m, 2, e);
  11840. /* 2. j = Count leading 1's up to CT_INV_MOD_PRE_CNT
  11841. * One or more of the top bits is 1 so count.
  11842. */
  11843. for (i = sp_count_bits(e)-2, j = 1; i >= 0; i--, j++) {
  11844. if ((!sp_is_bit_set(e, (unsigned int)i)) ||
  11845. (j == CT_INV_MOD_PRE_CNT)) {
  11846. break;
  11847. }
  11848. }
  11849. /* 3. Set tmp to product of leading bits. */
  11850. _sp_copy(pre[j-1], t);
  11851. /* 4. s = 0 */
  11852. s = 0;
  11853. /* 5. j = 0 */
  11854. j = 0;
  11855. /* 6. For i index of next top bit..0
  11856. * Do remaining bits in exponent.
  11857. */
  11858. for (; (err == MP_OKAY) && (i >= 0); i--) {
  11859. /* 6.1. bit = e[i] */
  11860. int bit = sp_is_bit_set(e, (unsigned int)i);
  11861. /* 6.2. j += bit
  11862. * Update count of consequitive 1 bits.
  11863. */
  11864. j += bit;
  11865. /* 6.3. s += 1
  11866. * Update count of squares required.
  11867. */
  11868. s++;
  11869. /* 6.4. if j == CT_INV_MOD_PRE_CNT or (bit == 0 and j > 0)
  11870. * Check if max 1 bits or 0 and have seen at least one 1 bit.
  11871. */
  11872. if ((j == CT_INV_MOD_PRE_CNT) || ((!bit) && (j > 0))) {
  11873. /* 6.4.1. s -= 1 - bit */
  11874. bit = 1 - bit;
  11875. s -= bit;
  11876. /* 6.4.2. For s downto 1
  11877. * Do s squares.
  11878. */
  11879. for (; (err == MP_OKAY) && (s > 0); s--) {
  11880. /* 6.4.2.1. t = (t ^ 2) mod m */
  11881. err = sp_sqr(t, t);
  11882. if (err == MP_OKAY) {
  11883. err = _sp_mont_red(t, m, mp);
  11884. }
  11885. }
  11886. /* 6.4.3. s = 1 - bit */
  11887. s = bit;
  11888. /* 6.4.4. t = (t * pre[j-1]) mod m */
  11889. if (err == MP_OKAY) {
  11890. err = sp_mul(t, pre[j-1], t);
  11891. }
  11892. if (err == MP_OKAY) {
  11893. err = _sp_mont_red(t, m, mp);
  11894. }
  11895. /* 6.4.5. j = 0
  11896. * Reset number of 1 bits seen.
  11897. */
  11898. j = 0;
  11899. }
  11900. }
  11901. }
  11902. if (err == MP_OKAY) {
  11903. /* 7. For s downto 1
  11904. * Do s squares - total remaining. */
  11905. for (; (err == MP_OKAY) && (s > 0); s--) {
  11906. /* 7.1. t = (t ^ 2) mod m */
  11907. err = sp_sqr(t, t);
  11908. if (err == MP_OKAY) {
  11909. err = _sp_mont_red(t, m, mp);
  11910. }
  11911. }
  11912. }
  11913. if (err == MP_OKAY) {
  11914. /* 8. If j > 0 then r = (t * pre[j-1]) mod m */
  11915. if (j > 0) {
  11916. err = sp_mul(t, pre[j-1], r);
  11917. if (err == MP_OKAY) {
  11918. err = _sp_mont_red(r, m, mp);
  11919. }
  11920. }
  11921. /* 9. Else r = t */
  11922. else {
  11923. _sp_copy(t, r);
  11924. }
  11925. }
  11926. #ifndef WOLFSSL_SP_NO_MALLOC
  11927. FREE_DYN_SP_INT_ARRAY(pre, NULL);
  11928. #else
  11929. FREE_SP_INT_ARRAY(pre, NULL);
  11930. #endif
  11931. return err;
  11932. }
  11933. /* Calculates the multiplicative inverse in the field - constant time.
  11934. *
  11935. * Modulus (m) must be a prime and greater than 2.
  11936. * For prime m, inv = a ^ (m-2) mod m as 1 = a ^ (m-1) mod m.
  11937. *
  11938. * @param [in] a SP integer, Montgomery form, to find inverse of.
  11939. * @param [in] m SP integer this is the modulus.
  11940. * @param [out] r SP integer to hold result.
  11941. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  11942. *
  11943. * @return MP_OKAY on success.
  11944. * @return MP_VAL when a, m or r is NULL; a is 0 or m is less than 3.
  11945. * @return MP_MEM when dynamic memory allocation fails.
  11946. */
  11947. int sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r,
  11948. sp_int_digit mp)
  11949. {
  11950. int err = MP_OKAY;
  11951. /* Validate parameters. */
  11952. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  11953. err = MP_VAL;
  11954. }
  11955. /* Ensure m is not too big. */
  11956. else if (m->used * 2 >= SP_INT_DIGITS) {
  11957. err = MP_VAL;
  11958. }
  11959. /* check that r can hold the range of the modulus result */
  11960. else if (m->used > r->size) {
  11961. err = MP_VAL;
  11962. }
  11963. /* 0 != n*m + 1 (+ve m), r*a mod 0 is always 0 (never 1) */
  11964. if ((err == MP_OKAY) && (sp_iszero(a) || sp_iszero(m) ||
  11965. ((m->used == 1) && (m->dp[0] < 3)))) {
  11966. err = MP_VAL;
  11967. }
  11968. if (err == MP_OKAY) {
  11969. /* Do operation. */
  11970. err = _sp_invmod_mont_ct(a, m, r, mp);
  11971. }
  11972. return err;
  11973. }
  11974. #endif /* WOLFSSL_SP_INVMOD_MONT_CT */
  11975. /**************************
  11976. * Exponentiation functions
  11977. **************************/
  11978. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  11979. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \
  11980. defined(OPENSSL_ALL)
  11981. #ifndef WC_PROTECT_ENCRYPTED_MEM
  11982. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  11983. * Process the exponent one bit at a time.
  11984. * Is constant time and can be cache attack resistant.
  11985. *
  11986. * Algorithm:
  11987. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  11988. * 1. s = 0
  11989. * 2. t[0] = b mod m.
  11990. * 3. t[1] = t[0]
  11991. * 4. For i in (bits-1)...0
  11992. * 4.1. t[s] = t[s] ^ 2
  11993. * 4.2. y = e[i]
  11994. * 4.3 j = y & s
  11995. * 4.4 s = s | y
  11996. * 4.5. t[j] = t[j] * b
  11997. * 5. r = t[1]
  11998. *
  11999. * @param [in] b SP integer that is the base.
  12000. * @param [in] e SP integer that is the exponent.
  12001. * @param [in] bits Number of bits in exponent to use. May be greater than
  12002. * count of bits in e.
  12003. * @param [in] m SP integer that is the modulus.
  12004. * @param [out] r SP integer to hold result.
  12005. *
  12006. * @return MP_OKAY on success.
  12007. * @return MP_MEM when dynamic memory allocation fails.
  12008. */
  12009. static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
  12010. const sp_int* m, sp_int* r)
  12011. {
  12012. int i;
  12013. int err = MP_OKAY;
  12014. int done = 0;
  12015. /* 1. s = 0 */
  12016. int s = 0;
  12017. #ifdef WC_NO_CACHE_RESISTANT
  12018. DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 2);
  12019. #else
  12020. DECL_SP_INT_ARRAY(t, 2 * m->used + 1, 3);
  12021. #endif
  12022. /* Allocate temporaries. */
  12023. #ifdef WC_NO_CACHE_RESISTANT
  12024. ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 2, err, NULL);
  12025. #else
  12026. /* Working SP int needed when cache resistant. */
  12027. ALLOC_SP_INT_ARRAY(t, 2 * m->used + 1, 3, err, NULL);
  12028. #endif
  12029. if (err == MP_OKAY) {
  12030. /* Initialize temporaries. */
  12031. _sp_init_size(t[0], 2 * m->used + 1);
  12032. _sp_init_size(t[1], 2 * m->used + 1);
  12033. #ifndef WC_NO_CACHE_RESISTANT
  12034. _sp_init_size(t[2], 2 * m->used + 1);
  12035. #endif
  12036. /* 2. t[0] = b mod m
  12037. * Ensure base is less than modulus - set fake working value to base.
  12038. */
  12039. if (_sp_cmp_abs(b, m) != MP_LT) {
  12040. err = sp_mod(b, m, t[0]);
  12041. /* Handle base == modulus. */
  12042. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  12043. _sp_set(r, 0);
  12044. done = 1;
  12045. }
  12046. }
  12047. else {
  12048. /* Copy base into working variable. */
  12049. _sp_copy(b, t[0]);
  12050. }
  12051. }
  12052. if ((!done) && (err == MP_OKAY)) {
  12053. /* 3. t[1] = t[0]
  12054. * Set real working value to base.
  12055. */
  12056. _sp_copy(t[0], t[1]);
  12057. /* 4. For i in (bits-1)...0 */
  12058. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12059. #ifdef WC_NO_CACHE_RESISTANT
  12060. /* 4.1. t[s] = t[s] ^ 2 */
  12061. err = sp_sqrmod(t[s], m, t[s]);
  12062. if (err == MP_OKAY) {
  12063. /* 4.2. y = e[i] */
  12064. int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  12065. /* 4.3. j = y & s */
  12066. int j = y & s;
  12067. /* 4.4 s = s | y */
  12068. s |= y;
  12069. /* 4.5. t[j] = t[j] * b */
  12070. err = _sp_mulmod(t[j], b, m, t[j]);
  12071. }
  12072. #else
  12073. /* 4.1. t[s] = t[s] ^ 2 */
  12074. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12075. ((size_t)t[1] & sp_off_on_addr[s ])),
  12076. t[2]);
  12077. err = sp_sqrmod(t[2], m, t[2]);
  12078. _sp_copy(t[2],
  12079. (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12080. ((size_t)t[1] & sp_off_on_addr[s ])));
  12081. if (err == MP_OKAY) {
  12082. /* 4.2. y = e[i] */
  12083. int y = (int)((e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1);
  12084. /* 4.3. j = y & s */
  12085. int j = y & s;
  12086. /* 4.4 s = s | y */
  12087. s |= y;
  12088. /* 4.5. t[j] = t[j] * b */
  12089. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12090. ((size_t)t[1] & sp_off_on_addr[j ])),
  12091. t[2]);
  12092. err = _sp_mulmod(t[2], b, m, t[2]);
  12093. _sp_copy(t[2],
  12094. (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12095. ((size_t)t[1] & sp_off_on_addr[j ])));
  12096. }
  12097. #endif
  12098. }
  12099. }
  12100. if ((!done) && (err == MP_OKAY)) {
  12101. /* 5. r = t[1] */
  12102. _sp_copy(t[1], r);
  12103. }
  12104. FREE_SP_INT_ARRAY(t, NULL);
  12105. return err;
  12106. }
  12107. #else
  12108. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12109. * Process the exponent one bit at a time with base in Montgomery form.
  12110. * Is constant time and cache attack resistant.
  12111. *
  12112. * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
  12113. * Cryptographic Hardware and Embedded Systems, CHES 2002
  12114. *
  12115. * Algorithm:
  12116. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12117. * 1. t[1] = b mod m.
  12118. * 2. t[0] = 1
  12119. * 3. For i in (bits-1)...0
  12120. * 3.1. y = e[i]
  12121. * 3.2. t[2] = t[0] * t[1]
  12122. * 3.3. t[3] = t[y] ^ 2
  12123. * 3.4. t[y] = t[3], t[y^1] = t[2]
  12124. * 4. r = t[0]
  12125. *
  12126. * @param [in] b SP integer that is the base.
  12127. * @param [in] e SP integer that is the exponent.
  12128. * @param [in] bits Number of bits in exponent to use. May be greater than
  12129. * count of bits in e.
  12130. * @param [in] m SP integer that is the modulus.
  12131. * @param [out] r SP integer to hold result.
  12132. *
  12133. * @return MP_OKAY on success.
  12134. * @return MP_MEM when dynamic memory allocation fails.
  12135. */
  12136. static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits,
  12137. const sp_int* m, sp_int* r)
  12138. {
  12139. int err = MP_OKAY;
  12140. int done = 0;
  12141. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
  12142. /* Allocate temporaries. */
  12143. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
  12144. if (err == MP_OKAY) {
  12145. /* Initialize temporaries. */
  12146. _sp_init_size(t[0], m->used * 2 + 1);
  12147. _sp_init_size(t[1], m->used * 2 + 1);
  12148. _sp_init_size(t[2], m->used * 2 + 1);
  12149. _sp_init_size(t[3], m->used * 2 + 1);
  12150. /* 1. Ensure base is less than modulus. */
  12151. if (_sp_cmp_abs(b, m) != MP_LT) {
  12152. err = sp_mod(b, m, t[1]);
  12153. /* Handle base == modulus. */
  12154. if ((err == MP_OKAY) && sp_iszero(t[1])) {
  12155. _sp_set(r, 0);
  12156. done = 1;
  12157. }
  12158. }
  12159. else {
  12160. /* Copy base into working variable. */
  12161. err = sp_copy(b, t[1]);
  12162. }
  12163. }
  12164. if ((!done) && (err == MP_OKAY)) {
  12165. int i;
  12166. /* 2. t[0] = 1 */
  12167. _sp_set(t[0], 1);
  12168. /* 3. For i in (bits-1)...0 */
  12169. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12170. /* 3.1. y = e[i] */
  12171. int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  12172. /* 3.2. t[2] = t[0] * t[1] */
  12173. err = sp_mulmod(t[0], t[1], m, t[2]);
  12174. /* 3.3. t[3] = t[y] ^ 2 */
  12175. if (err == MP_OKAY) {
  12176. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
  12177. ((size_t)t[1] & sp_off_on_addr[y ])),
  12178. t[3]);
  12179. err = sp_sqrmod(t[3], m, t[3]);
  12180. }
  12181. /* 3.4. t[y] = t[3], t[y^1] = t[2] */
  12182. if (err == MP_OKAY) {
  12183. _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
  12184. }
  12185. }
  12186. }
  12187. if ((!done) && (err == MP_OKAY)) {
  12188. /* 4. r = t[0] */
  12189. err = sp_copy(t[0], r);
  12190. }
  12191. FREE_SP_INT_ARRAY(t, NULL);
  12192. return err;
  12193. }
  12194. #endif /* WC_PROTECT_ENCRYPTED_MEM */
  12195. #endif
  12196. #if (defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  12197. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \
  12198. defined(OPENSSL_ALL)
  12199. #ifndef WC_NO_HARDEN
  12200. #if !defined(WC_NO_CACHE_RESISTANT)
  12201. #ifndef WC_PROTECT_ENCRYPTED_MEM
  12202. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12203. * Process the exponent one bit at a time with base in Montgomery form.
  12204. * Is constant time and cache attack resistant.
  12205. *
  12206. * Algorithm:
  12207. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12208. * 1. t[0] = b mod m.
  12209. * 2. s = 0
  12210. * 3. t[0] = ToMont(t[0])
  12211. * 4. t[1] = t[0]
  12212. * 5. bm = t[0]
  12213. * 6. For i in (bits-1)...0
  12214. * 6.1. t[s] = t[s] ^ 2
  12215. * 6.2. y = e[i]
  12216. * 6.3 j = y & s
  12217. * 6.4 s = s | y
  12218. * 6.5. t[j] = t[j] * bm
  12219. * 7. t[1] = FromMont(t[1])
  12220. * 8. r = t[1]
  12221. *
  12222. * @param [in] b SP integer that is the base.
  12223. * @param [in] e SP integer that is the exponent.
  12224. * @param [in] bits Number of bits in exponent to use. May be greater than
  12225. * count of bits in e.
  12226. * @param [in] m SP integer that is the modulus.
  12227. * @param [out] r SP integer to hold result.
  12228. *
  12229. * @return MP_OKAY on success.
  12230. * @return MP_MEM when dynamic memory allocation fails.
  12231. */
  12232. static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
  12233. const sp_int* m, sp_int* r)
  12234. {
  12235. int err = MP_OKAY;
  12236. int done = 0;
  12237. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
  12238. /* Allocate temporaries. */
  12239. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
  12240. if (err == MP_OKAY) {
  12241. /* Initialize temporaries. */
  12242. _sp_init_size(t[0], m->used * 2 + 1);
  12243. _sp_init_size(t[1], m->used * 2 + 1);
  12244. _sp_init_size(t[2], m->used * 2 + 1);
  12245. _sp_init_size(t[3], m->used * 2 + 1);
  12246. /* 1. Ensure base is less than modulus. */
  12247. if (_sp_cmp_abs(b, m) != MP_LT) {
  12248. err = sp_mod(b, m, t[0]);
  12249. /* Handle base == modulus. */
  12250. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  12251. _sp_set(r, 0);
  12252. done = 1;
  12253. }
  12254. }
  12255. else {
  12256. /* Copy base into working variable. */
  12257. _sp_copy(b, t[0]);
  12258. }
  12259. }
  12260. if ((!done) && (err == MP_OKAY)) {
  12261. int i;
  12262. /* 2. s = 0 */
  12263. int s = 0;
  12264. sp_int_digit mp;
  12265. /* Calculate Montgomery multiplier for reduction. */
  12266. _sp_mont_setup(m, &mp);
  12267. /* 3. t[0] = ToMont(t[0])
  12268. * Convert base to Montgomery form - as fake working value.
  12269. */
  12270. err = sp_mont_norm(t[1], m);
  12271. if (err == MP_OKAY) {
  12272. err = sp_mul(t[0], t[1], t[0]);
  12273. }
  12274. if (err == MP_OKAY) {
  12275. /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
  12276. err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1);
  12277. }
  12278. if (err == MP_OKAY) {
  12279. /* 4. t[1] = t[0]
  12280. * Set real working value to base.
  12281. */
  12282. _sp_copy(t[0], t[1]);
  12283. /* 5. bm = t[0]. */
  12284. _sp_copy(t[0], t[2]);
  12285. }
  12286. /* 6. For i in (bits-1)...0 */
  12287. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12288. /* 6.1. t[s] = t[s] ^ 2 */
  12289. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12290. ((size_t)t[1] & sp_off_on_addr[s ])),
  12291. t[3]);
  12292. err = sp_sqr(t[3], t[3]);
  12293. if (err == MP_OKAY) {
  12294. err = _sp_mont_red(t[3], m, mp);
  12295. }
  12296. _sp_copy(t[3],
  12297. (sp_int*)(((size_t)t[0] & sp_off_on_addr[s^1]) +
  12298. ((size_t)t[1] & sp_off_on_addr[s ])));
  12299. if (err == MP_OKAY) {
  12300. /* 6.2. y = e[i] */
  12301. int y = (int)((e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1);
  12302. /* 6.3 j = y & s */
  12303. int j = y & s;
  12304. /* 6.4 s = s | y */
  12305. s |= y;
  12306. /* 6.5. t[j] = t[j] * bm */
  12307. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12308. ((size_t)t[1] & sp_off_on_addr[j ])),
  12309. t[3]);
  12310. err = sp_mul(t[3], t[2], t[3]);
  12311. if (err == MP_OKAY) {
  12312. err = _sp_mont_red(t[3], m, mp);
  12313. }
  12314. _sp_copy(t[3],
  12315. (sp_int*)(((size_t)t[0] & sp_off_on_addr[j^1]) +
  12316. ((size_t)t[1] & sp_off_on_addr[j ])));
  12317. }
  12318. }
  12319. if (err == MP_OKAY) {
  12320. /* 7. t[1] = FromMont(t[1]) */
  12321. err = _sp_mont_red(t[1], m, mp);
  12322. /* Reduction implementation returns number to range: 0..m-1. */
  12323. }
  12324. }
  12325. if ((!done) && (err == MP_OKAY)) {
  12326. /* 8. r = t[1] */
  12327. _sp_copy(t[1], r);
  12328. }
  12329. FREE_SP_INT_ARRAY(t, NULL);
  12330. return err;
  12331. }
  12332. #else
  12333. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12334. * Process the exponent one bit at a time with base in Montgomery form.
  12335. * Is constant time and cache attack resistant.
  12336. *
  12337. * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder",
  12338. * Cryptographic Hardware and Embedded Systems, CHES 2002
  12339. *
  12340. * Algorithm:
  12341. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12342. * 1. t[1] = b mod m.
  12343. * 2. t[0] = ToMont(1)
  12344. * 3. t[1] = ToMont(t[1])
  12345. * 4. For i in (bits-1)...0
  12346. * 4.1. y = e[i]
  12347. * 4.2. t[2] = t[0] * t[1]
  12348. * 4.3. t[3] = t[y] ^ 2
  12349. * 4.4. t[y] = t[3], t[y^1] = t[2]
  12350. * 5. t[0] = FromMont(t[0])
  12351. * 6. r = t[0]
  12352. *
  12353. * @param [in] b SP integer that is the base.
  12354. * @param [in] e SP integer that is the exponent.
  12355. * @param [in] bits Number of bits in exponent to use. May be greater than
  12356. * count of bits in e.
  12357. * @param [in] m SP integer that is the modulus.
  12358. * @param [out] r SP integer to hold result.
  12359. *
  12360. * @return MP_OKAY on success.
  12361. * @return MP_MEM when dynamic memory allocation fails.
  12362. */
  12363. static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
  12364. const sp_int* m, sp_int* r)
  12365. {
  12366. int err = MP_OKAY;
  12367. int done = 0;
  12368. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4);
  12369. /* Allocate temporaries. */
  12370. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL);
  12371. if (err == MP_OKAY) {
  12372. /* Initialize temporaries. */
  12373. _sp_init_size(t[0], m->used * 2 + 1);
  12374. _sp_init_size(t[1], m->used * 2 + 1);
  12375. _sp_init_size(t[2], m->used * 2 + 1);
  12376. _sp_init_size(t[3], m->used * 2 + 1);
  12377. /* 1. Ensure base is less than modulus. */
  12378. if (_sp_cmp_abs(b, m) != MP_LT) {
  12379. err = sp_mod(b, m, t[1]);
  12380. /* Handle base == modulus. */
  12381. if ((err == MP_OKAY) && sp_iszero(t[1])) {
  12382. _sp_set(r, 0);
  12383. done = 1;
  12384. }
  12385. }
  12386. else {
  12387. /* Copy base into working variable. */
  12388. err = sp_copy(b, t[1]);
  12389. }
  12390. }
  12391. if ((!done) && (err == MP_OKAY)) {
  12392. int i;
  12393. sp_int_digit mp;
  12394. /* Calculate Montgomery multiplier for reduction. */
  12395. _sp_mont_setup(m, &mp);
  12396. /* 2. t[0] = ToMont(1)
  12397. * Calculate 1 in Montgomery form.
  12398. */
  12399. err = sp_mont_norm(t[0], m);
  12400. if (err == MP_OKAY) {
  12401. /* 3. t[1] = ToMont(t[1])
  12402. * Convert base to Montgomery form.
  12403. */
  12404. err = sp_mulmod(t[1], t[0], m, t[1]);
  12405. }
  12406. /* 4. For i in (bits-1)...0 */
  12407. for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) {
  12408. /* 4.1. y = e[i] */
  12409. int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  12410. /* 4.2. t[2] = t[0] * t[1] */
  12411. err = sp_mul(t[0], t[1], t[2]);
  12412. if (err == MP_OKAY) {
  12413. err = _sp_mont_red(t[2], m, mp);
  12414. }
  12415. /* 4.3. t[3] = t[y] ^ 2 */
  12416. if (err == MP_OKAY) {
  12417. _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) +
  12418. ((size_t)t[1] & sp_off_on_addr[y ])),
  12419. t[3]);
  12420. err = sp_sqr(t[3], t[3]);
  12421. }
  12422. if (err == MP_OKAY) {
  12423. err = _sp_mont_red(t[3], m, mp);
  12424. }
  12425. /* 4.4. t[y] = t[3], t[y^1] = t[2] */
  12426. if (err == MP_OKAY) {
  12427. _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used);
  12428. }
  12429. }
  12430. if (err == MP_OKAY) {
  12431. /* 5. t[0] = FromMont(t[0]) */
  12432. err = _sp_mont_red(t[0], m, mp);
  12433. /* Reduction implementation returns number to range: 0..m-1. */
  12434. }
  12435. }
  12436. if ((!done) && (err == MP_OKAY)) {
  12437. /* 6. r = t[0] */
  12438. err = sp_copy(t[0], r);
  12439. }
  12440. FREE_SP_INT_ARRAY(t, NULL);
  12441. return err;
  12442. }
  12443. #endif /* WC_PROTECT_ENCRYPTED_MEM */
  12444. #else
  12445. #ifdef SP_ALLOC
  12446. #define SP_ALLOC_PREDEFINED
  12447. #endif
  12448. /* Always allocate large array of sp_ints unless defined WOLFSSL_SP_NO_MALLOC */
  12449. #define SP_ALLOC
  12450. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12451. * Creates a window of precalculated exponents with base in Montgomery form.
  12452. * Is constant time but NOT cache attack resistant.
  12453. *
  12454. * Algorithm:
  12455. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12456. * w: window size based on bits.
  12457. * 1. t[1] = b mod m.
  12458. * 2. t[0] = MontNorm(m) = ToMont(1)
  12459. * 3. t[1] = ToMont(t[1])
  12460. * 4. For i in 2..(2 ^ w) - 1
  12461. * 4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2
  12462. * 4.2 if i[0] == 1 then t[i] = t[i-1] * t[1]
  12463. * 5. cb = w * (bits / w)
  12464. * 5. tr = t[e / (2 ^ cb)]
  12465. * 6. For i in cb..w
  12466. * 6.1. y = e[(i-1)..(i-w)]
  12467. * 6.2. tr = tr ^ (2 * w)
  12468. * 6.3. tr = tr * t[y]
  12469. * 7. tr = FromMont(tr)
  12470. * 8. r = tr
  12471. *
  12472. * @param [in] b SP integer that is the base.
  12473. * @param [in] e SP integer that is the exponent.
  12474. * @param [in] bits Number of bits in exponent to use. May be greater than
  12475. * count of bits in e.
  12476. * @param [in] m SP integer that is the modulus.
  12477. * @param [out] r SP integer to hold result.
  12478. *
  12479. * @return MP_OKAY on success.
  12480. * @return MP_MEM when dynamic memory allocation fails.
  12481. */
  12482. static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits,
  12483. const sp_int* m, sp_int* r)
  12484. {
  12485. int i;
  12486. int c;
  12487. int y;
  12488. int winBits;
  12489. int preCnt;
  12490. int err = MP_OKAY;
  12491. int done = 0;
  12492. sp_int_digit mask;
  12493. sp_int* tr = NULL;
  12494. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 6) + 1);
  12495. /* Window bits based on number of pre-calculations versus number of loop
  12496. * calculcations.
  12497. * Exponents for RSA and DH will result in 6-bit windows.
  12498. */
  12499. if (bits > 450) {
  12500. winBits = 6;
  12501. }
  12502. else if (bits <= 21) {
  12503. winBits = 1;
  12504. }
  12505. else if (bits <= 36) {
  12506. winBits = 3;
  12507. }
  12508. else if (bits <= 140) {
  12509. winBits = 4;
  12510. }
  12511. else {
  12512. winBits = 5;
  12513. }
  12514. /* An entry for each possible 0..2^winBits-1 value. */
  12515. preCnt = 1 << winBits;
  12516. /* Mask for calculating index into pre-computed table. */
  12517. mask = preCnt - 1;
  12518. /* Allocate sp_ints for:
  12519. * - pre-computation table
  12520. * - temporary result
  12521. */
  12522. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, preCnt + 1, err, NULL);
  12523. if (err == MP_OKAY) {
  12524. /* Set variable to use allocate memory. */
  12525. tr = t[preCnt];
  12526. /* Initialize all allocated. */
  12527. for (i = 0; i < preCnt; i++) {
  12528. _sp_init_size(t[i], m->used * 2 + 1);
  12529. }
  12530. _sp_init_size(tr, m->used * 2 + 1);
  12531. /* 1. t[1] = b mod m. */
  12532. if (_sp_cmp_abs(b, m) != MP_LT) {
  12533. err = sp_mod(b, m, t[1]);
  12534. /* Handle base == modulus. */
  12535. if ((err == MP_OKAY) && sp_iszero(t[1])) {
  12536. _sp_set(r, 0);
  12537. done = 1;
  12538. }
  12539. }
  12540. else {
  12541. /* Copy base into entry of table to contain b^1. */
  12542. _sp_copy(b, t[1]);
  12543. }
  12544. }
  12545. if ((!done) && (err == MP_OKAY)) {
  12546. sp_int_digit mp;
  12547. sp_int_digit n;
  12548. /* Calculate Montgomery multiplier for reduction. */
  12549. _sp_mont_setup(m, &mp);
  12550. /* 2. t[0] = MontNorm(m) = ToMont(1) */
  12551. err = sp_mont_norm(t[0], m);
  12552. if (err == MP_OKAY) {
  12553. /* 3. t[1] = ToMont(t[1]) */
  12554. err = sp_mul(t[1], t[0], t[1]);
  12555. }
  12556. if (err == MP_OKAY) {
  12557. /* t[1] = t[1] mod m, temporary size has to be bigger than t[1]. */
  12558. err = _sp_div(t[1], m, NULL, t[1], t[1]->used + 1);
  12559. }
  12560. /* 4. For i in 2..(2 ^ w) - 1 */
  12561. for (i = 2; (i < preCnt) && (err == MP_OKAY); i++) {
  12562. /* 4.1 if i[0] == 0 then t[i] = t[i/2] ^ 2 */
  12563. if ((i & 1) == 0) {
  12564. err = sp_sqr(t[i/2], t[i]);
  12565. }
  12566. /* 4.2 if i[0] == 1 then t[i] = t[i-1] * t[1] */
  12567. else {
  12568. err = sp_mul(t[i-1], t[1], t[i]);
  12569. }
  12570. /* Montgomery reduce square or multiplication result. */
  12571. if (err == MP_OKAY) {
  12572. err = _sp_mont_red(t[i], m, mp);
  12573. }
  12574. }
  12575. if (err == MP_OKAY) {
  12576. /* 5. cb = w * (bits / w) */
  12577. i = (bits - 1) >> SP_WORD_SHIFT;
  12578. n = e->dp[i--];
  12579. /* Find top bit index in last word. */
  12580. c = bits & (SP_WORD_SIZE - 1);
  12581. if (c == 0) {
  12582. c = SP_WORD_SIZE;
  12583. }
  12584. /* Use as many bits from top to make remaining a multiple of window
  12585. * size.
  12586. */
  12587. if ((bits % winBits) != 0) {
  12588. c -= bits % winBits;
  12589. }
  12590. else {
  12591. c -= winBits;
  12592. }
  12593. /* 5. tr = t[e / (2 ^ cb)] */
  12594. y = (int)(n >> c);
  12595. n <<= SP_WORD_SIZE - c;
  12596. /* 5. Copy table value for first window. */
  12597. _sp_copy(t[y], tr);
  12598. /* 6. For i in cb..w */
  12599. for (; (i >= 0) || (c >= winBits); ) {
  12600. int j;
  12601. /* 6.1. y = e[(i-1)..(i-w)] */
  12602. if (c == 0) {
  12603. /* Bits up to end of digit */
  12604. n = e->dp[i--];
  12605. y = (int)(n >> (SP_WORD_SIZE - winBits));
  12606. n <<= winBits;
  12607. c = SP_WORD_SIZE - winBits;
  12608. }
  12609. else if (c < winBits) {
  12610. /* Bits to end of digit and part of next */
  12611. y = (int)(n >> (SP_WORD_SIZE - winBits));
  12612. n = e->dp[i--];
  12613. c = winBits - c;
  12614. y |= (int)(n >> (SP_WORD_SIZE - c));
  12615. n <<= c;
  12616. c = SP_WORD_SIZE - c;
  12617. }
  12618. else {
  12619. /* Bits from middle of digit */
  12620. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  12621. n <<= winBits;
  12622. c -= winBits;
  12623. }
  12624. /* 6.2. tr = tr ^ (2 * w) */
  12625. for (j = 0; (j < winBits) && (err == MP_OKAY); j++) {
  12626. err = sp_sqr(tr, tr);
  12627. if (err == MP_OKAY) {
  12628. err = _sp_mont_red(tr, m, mp);
  12629. }
  12630. }
  12631. /* 6.3. tr = tr * t[y] */
  12632. if (err == MP_OKAY) {
  12633. err = sp_mul(tr, t[y], tr);
  12634. }
  12635. if (err == MP_OKAY) {
  12636. err = _sp_mont_red(tr, m, mp);
  12637. }
  12638. }
  12639. }
  12640. if (err == MP_OKAY) {
  12641. /* 7. tr = FromMont(tr) */
  12642. err = _sp_mont_red(tr, m, mp);
  12643. /* Reduction implementation returns number to range: 0..m-1. */
  12644. }
  12645. }
  12646. if ((!done) && (err == MP_OKAY)) {
  12647. /* 8. r = tr */
  12648. _sp_copy(tr, r);
  12649. }
  12650. FREE_SP_INT_ARRAY(t, NULL);
  12651. return err;
  12652. }
  12653. #ifndef SP_ALLOC_PREDEFINED
  12654. #undef SP_ALLOC
  12655. #undef SP_ALLOC_PREDEFINED
  12656. #endif
  12657. #endif /* !WC_NO_CACHE_RESISTANT */
  12658. #endif /* !WC_NO_HARDEN */
  12659. /* w = Log2(SP_WORD_SIZE) - 1 */
  12660. #if SP_WORD_SIZE == 8
  12661. #define EXP2_WINSIZE 2
  12662. #elif SP_WORD_SIZE == 16
  12663. #define EXP2_WINSIZE 3
  12664. #elif SP_WORD_SIZE == 32
  12665. #define EXP2_WINSIZE 4
  12666. #elif SP_WORD_SIZE == 64
  12667. #define EXP2_WINSIZE 5
  12668. #else
  12669. #error "sp_exptmod_base_2: Unexpected SP_WORD_SIZE"
  12670. #endif
  12671. /* Mask is all bits in window set. */
  12672. #define EXP2_MASK ((1 << EXP2_WINSIZE) - 1)
  12673. /* Internal. Exponentiates 2 to the power of e modulo m into r: r = 2 ^ e mod m
  12674. * Is constant time and cache attack resistant.
  12675. *
  12676. * Calculates value to make mod operations constant time expect when
  12677. * WC_NO_HARDERN defined or modulus fits in one word.
  12678. *
  12679. * Algorithm:
  12680. * b: base, e: exponent, m: modulus, r: result, bits: #bits to use
  12681. * w: window size based on #bits in word.
  12682. * 1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
  12683. * else tr = 1
  12684. * 2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
  12685. * else a = 0
  12686. * 3. cb = w * (bits / w)
  12687. * 4. y = e / (2 ^ cb)
  12688. * 5. tr = (tr * (2 ^ y) + a) mod m
  12689. * 6. For i in cb..w
  12690. * 6.1. y = e[(i-1)..(i-w)]
  12691. * 6.2. tr = tr ^ (2 * w)
  12692. * 6.3. tr = ((tr * (2 ^ y) + a) mod m
  12693. * 7. if Words(m) > 1 then tr = FromMont(tr)
  12694. * 8. r = tr
  12695. *
  12696. * @param [in] e SP integer that is the exponent.
  12697. * @param [in] digits Number of digits in base to use. May be greater than
  12698. * count of bits in b.
  12699. * @param [in] m SP integer that is the modulus.
  12700. * @param [out] r SP integer to hold result.
  12701. *
  12702. * @return MP_OKAY on success.
  12703. * @return MP_MEM when dynamic memory allocation fails.
  12704. */
  12705. static int _sp_exptmod_base_2(const sp_int* e, int digits, const sp_int* m,
  12706. sp_int* r)
  12707. {
  12708. int i = 0;
  12709. int c = 0;
  12710. int y;
  12711. int err = MP_OKAY;
  12712. sp_int_digit mp = 0;
  12713. sp_int_digit n = 0;
  12714. #ifndef WC_NO_HARDEN
  12715. sp_int* a = NULL;
  12716. sp_int* tr = NULL;
  12717. DECL_SP_INT_ARRAY(d, m->used * 2 + 1, 2);
  12718. #else
  12719. DECL_SP_INT(tr, m->used * 2 + 1);
  12720. #endif
  12721. int useMont = (m->used > 1);
  12722. #if 0
  12723. sp_print_int(2, "a");
  12724. sp_print(e, "b");
  12725. sp_print(m, "m");
  12726. #endif
  12727. #ifndef WC_NO_HARDEN
  12728. /* Allocate sp_ints for:
  12729. * - constant time add value for mod operation
  12730. * - temporary result
  12731. */
  12732. ALLOC_SP_INT_ARRAY(d, m->used * 2 + 1, 2, err, NULL);
  12733. #else
  12734. /* Allocate sp_int for temporary result. */
  12735. ALLOC_SP_INT(tr, m->used * 2 + 1, err, NULL);
  12736. #endif
  12737. if (err == MP_OKAY) {
  12738. #ifndef WC_NO_HARDEN
  12739. a = d[0];
  12740. tr = d[1];
  12741. _sp_init_size(a, m->used * 2 + 1);
  12742. #endif
  12743. _sp_init_size(tr, m->used * 2 + 1);
  12744. }
  12745. if ((err == MP_OKAY) && useMont) {
  12746. /* Calculate Montgomery multiplier for reduction. */
  12747. _sp_mont_setup(m, &mp);
  12748. }
  12749. if (err == MP_OKAY) {
  12750. /* 1. if Words(m) > 1 then tr = MontNorm(m) = ToMont(1)
  12751. * else tr = 1
  12752. */
  12753. if (useMont) {
  12754. /* Calculate Montgomery normalizer for modulus - 1 in Montgomery
  12755. * form.
  12756. */
  12757. err = sp_mont_norm(tr, m);
  12758. }
  12759. else {
  12760. /* For single word modulus don't use Montgomery form. */
  12761. err = sp_set(tr, 1);
  12762. }
  12763. }
  12764. /* 2. if Words(m) > 1 and HARDEN then a = m * (2 ^ (2^w))
  12765. * else a = 0
  12766. */
  12767. #ifndef WC_NO_HARDEN
  12768. if ((err == MP_OKAY) && useMont) {
  12769. err = sp_mul_2d(m, 1 << EXP2_WINSIZE, a);
  12770. }
  12771. #endif
  12772. if (err == MP_OKAY) {
  12773. /* 3. cb = w * (bits / w) */
  12774. i = digits - 1;
  12775. n = e->dp[i--];
  12776. c = SP_WORD_SIZE;
  12777. #if EXP2_WINSIZE != 1
  12778. c -= (digits * SP_WORD_SIZE) % EXP2_WINSIZE;
  12779. if (c != SP_WORD_SIZE) {
  12780. /* 4. y = e / (2 ^ cb) */
  12781. y = (int)(n >> c);
  12782. n <<= SP_WORD_SIZE - c;
  12783. }
  12784. else
  12785. #endif
  12786. {
  12787. /* 4. y = e / (2 ^ cb) */
  12788. y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
  12789. n <<= EXP2_WINSIZE;
  12790. c -= EXP2_WINSIZE;
  12791. }
  12792. /* 5. tr = (tr * (2 ^ y) + a) mod m */
  12793. err = sp_mul_2d(tr, y, tr);
  12794. }
  12795. #ifndef WC_NO_HARDEN
  12796. if ((err == MP_OKAY) && useMont) {
  12797. /* Add value to make mod operation constant time. */
  12798. err = sp_add(tr, a, tr);
  12799. }
  12800. #endif
  12801. if (err == MP_OKAY) {
  12802. err = sp_mod(tr, m, tr);
  12803. }
  12804. /* 6. For i in cb..w */
  12805. for (; (err == MP_OKAY) && ((i >= 0) || (c >= EXP2_WINSIZE)); ) {
  12806. int j;
  12807. /* 6.1. y = e[(i-1)..(i-w)] */
  12808. if (c == 0) {
  12809. /* Bits from next digit. */
  12810. n = e->dp[i--];
  12811. y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
  12812. n <<= EXP2_WINSIZE;
  12813. c = SP_WORD_SIZE - EXP2_WINSIZE;
  12814. }
  12815. #if (EXP2_WINSIZE != 1) && (EXP2_WINSIZE != 2) && (EXP2_WINSIZE != 4)
  12816. else if (c < EXP2_WINSIZE) {
  12817. /* Bits to end of digit and part of next */
  12818. y = (int)(n >> (SP_WORD_SIZE - EXP2_WINSIZE));
  12819. n = e->dp[i--];
  12820. c = EXP2_WINSIZE - c;
  12821. y |= (int)(n >> (SP_WORD_SIZE - c));
  12822. n <<= c;
  12823. c = SP_WORD_SIZE - c;
  12824. }
  12825. #endif
  12826. else {
  12827. /* Bits from middle of digit */
  12828. y = (int)((n >> (SP_WORD_SIZE - EXP2_WINSIZE)) & EXP2_MASK);
  12829. n <<= EXP2_WINSIZE;
  12830. c -= EXP2_WINSIZE;
  12831. }
  12832. /* 6.2. tr = tr ^ (2 * w) */
  12833. for (j = 0; (j < EXP2_WINSIZE) && (err == MP_OKAY); j++) {
  12834. err = sp_sqr(tr, tr);
  12835. if (err == MP_OKAY) {
  12836. if (useMont) {
  12837. err = _sp_mont_red(tr, m, mp);
  12838. }
  12839. else {
  12840. err = sp_mod(tr, m, tr);
  12841. }
  12842. }
  12843. }
  12844. /* 6.3. tr = ((tr * (2 ^ y) + a) mod m */
  12845. if (err == MP_OKAY) {
  12846. err = sp_mul_2d(tr, y, tr);
  12847. }
  12848. #ifndef WC_NO_HARDEN
  12849. if ((err == MP_OKAY) && useMont) {
  12850. /* Add value to make mod operation constant time. */
  12851. err = sp_add(tr, a, tr);
  12852. }
  12853. #endif
  12854. if (err == MP_OKAY) {
  12855. /* Reduce current result by modulus. */
  12856. err = sp_mod(tr, m, tr);
  12857. }
  12858. }
  12859. /* 7. if Words(m) > 1 then tr = FromMont(tr) */
  12860. if ((err == MP_OKAY) && useMont) {
  12861. err = _sp_mont_red(tr, m, mp);
  12862. /* Reduction implementation returns number to range: 0..m-1. */
  12863. }
  12864. if (err == MP_OKAY) {
  12865. /* 8. r = tr */
  12866. _sp_copy(tr, r);
  12867. }
  12868. #if 0
  12869. sp_print(r, "rme");
  12870. #endif
  12871. #ifndef WC_NO_HARDEN
  12872. FREE_SP_INT_ARRAY(d, NULL);
  12873. #else
  12874. FREE_SP_INT(tr, m->used * 2 + 1);
  12875. #endif
  12876. return err;
  12877. }
  12878. #endif
  12879. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  12880. !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
  12881. defined(OPENSSL_ALL)
  12882. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  12883. *
  12884. * Error returned when parameters r == e or r == m and base >= modulus.
  12885. *
  12886. * @param [in] b SP integer that is the base.
  12887. * @param [in] e SP integer that is the exponent.
  12888. * @param [in] digits Number of digits in exponent to use. May be greater
  12889. * than count of digits in e.
  12890. * @param [in] m SP integer that is the modulus.
  12891. * @param [out] r SP integer to hold result.
  12892. *
  12893. * @return MP_OKAY on success.
  12894. * @return MP_VAL when b, e, m or r is NULL, digits is negative, or m <= 0 or
  12895. * e is negative.
  12896. * @return MP_MEM when dynamic memory allocation fails.
  12897. */
  12898. int sp_exptmod_ex(const sp_int* b, const sp_int* e, int digits, const sp_int* m,
  12899. sp_int* r)
  12900. {
  12901. int err = MP_OKAY;
  12902. int done = 0;
  12903. int mBits = sp_count_bits(m);
  12904. int bBits = sp_count_bits(b);
  12905. int eBits = sp_count_bits(e);
  12906. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL) ||
  12907. (digits < 0)) {
  12908. err = MP_VAL;
  12909. }
  12910. /* Ensure m is not too big. */
  12911. else if (m->used * 2 >= SP_INT_DIGITS) {
  12912. err = MP_VAL;
  12913. }
  12914. #if 0
  12915. if (err == MP_OKAY) {
  12916. sp_print(b, "a");
  12917. sp_print(e, "b");
  12918. sp_print(m, "m");
  12919. }
  12920. #endif
  12921. /* Check for invalid modulus. */
  12922. if ((err == MP_OKAY) && sp_iszero(m)) {
  12923. err = MP_VAL;
  12924. }
  12925. #ifdef WOLFSSL_SP_INT_NEGATIVE
  12926. /* Check for unsupported negative values of exponent and modulus. */
  12927. if ((err == MP_OKAY) && ((e->sign == MP_NEG) || (m->sign == MP_NEG))) {
  12928. err = MP_VAL;
  12929. }
  12930. #endif
  12931. /* Check for degenerate cases. */
  12932. if ((err == MP_OKAY) && sp_isone(m)) {
  12933. _sp_set(r, 0);
  12934. done = 1;
  12935. }
  12936. if ((!done) && (err == MP_OKAY) && sp_iszero(e)) {
  12937. _sp_set(r, 1);
  12938. done = 1;
  12939. }
  12940. /* Ensure base is less than modulus. */
  12941. if ((!done) && (err == MP_OKAY) && (_sp_cmp_abs(b, m) != MP_LT)) {
  12942. if ((r == e) || (r == m)) {
  12943. err = MP_VAL;
  12944. }
  12945. if (err == MP_OKAY) {
  12946. err = sp_mod(b, m, r);
  12947. }
  12948. if (err == MP_OKAY) {
  12949. b = r;
  12950. }
  12951. }
  12952. /* Check for degenerate case of base. */
  12953. if ((!done) && (err == MP_OKAY) && sp_iszero(b)) {
  12954. _sp_set(r, 0);
  12955. done = 1;
  12956. }
  12957. /* Ensure SP integers have space for intermediate values. */
  12958. if ((!done) && (err == MP_OKAY) && (m->used * 2 >= r->size)) {
  12959. err = MP_VAL;
  12960. }
  12961. if ((!done) && (err == MP_OKAY)) {
  12962. /* Use code optimized for specific sizes if possible */
  12963. #if (defined(WOLFSSL_SP_MATH) || defined(WOLFSSL_SP_MATH_ALL)) && \
  12964. (defined(WOLFSSL_HAVE_SP_RSA) || defined(WOLFSSL_HAVE_SP_DH))
  12965. #ifndef WOLFSSL_SP_NO_2048
  12966. if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) &&
  12967. (eBits <= 1024)) {
  12968. err = sp_ModExp_1024((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  12969. done = 1;
  12970. }
  12971. else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) &&
  12972. (eBits <= 2048)) {
  12973. err = sp_ModExp_2048((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  12974. done = 1;
  12975. }
  12976. else
  12977. #endif
  12978. #ifndef WOLFSSL_SP_NO_3072
  12979. if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) &&
  12980. (eBits <= 1536)) {
  12981. err = sp_ModExp_1536((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  12982. done = 1;
  12983. }
  12984. else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) &&
  12985. (eBits <= 3072)) {
  12986. err = sp_ModExp_3072((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  12987. done = 1;
  12988. }
  12989. else
  12990. #endif
  12991. #ifdef WOLFSSL_SP_4096
  12992. if ((mBits == 4096) && sp_isodd(m) && (bBits <= 4096) &&
  12993. (eBits <= 4096)) {
  12994. err = sp_ModExp_4096((sp_int*)b, (sp_int*)e, (sp_int*)m, r);
  12995. done = 1;
  12996. }
  12997. else
  12998. #endif
  12999. #endif
  13000. {
  13001. /* SP does not support size. */
  13002. }
  13003. }
  13004. #if defined(WOLFSSL_SP_MATH_ALL) || !defined(NO_DH) || defined(OPENSSL_ALL)
  13005. #if (defined(WOLFSSL_RSA_VERIFY_ONLY) || defined(WOLFSSL_RSA_PUBLIC_ONLY)) && \
  13006. defined(NO_DH)
  13007. if ((!done) && (err == MP_OKAY)) {
  13008. /* Use non-constant time version - fastest. */
  13009. err = sp_exptmod_nct(b, e, m, r);
  13010. }
  13011. #else
  13012. #if defined(WOLFSSL_SP_MATH_ALL) || defined(OPENSSL_ALL)
  13013. if ((!done) && (err == MP_OKAY) && (b->used == 1) && (b->dp[0] == 2) &&
  13014. mp_isodd(m)) {
  13015. /* Use the generic base 2 implementation. */
  13016. err = _sp_exptmod_base_2(e, digits, m, r);
  13017. }
  13018. else if ((!done) && (err == MP_OKAY) && ((m->used > 1) && mp_isodd(m))) {
  13019. #ifndef WC_NO_HARDEN
  13020. /* Use constant time version hardened against timing attacks and
  13021. * cache attacks when WC_NO_CACHE_RESISTANT not defined. */
  13022. err = _sp_exptmod_mont_ex(b, e, digits * SP_WORD_SIZE, m, r);
  13023. #else
  13024. /* Use non-constant time version - fastest. */
  13025. err = sp_exptmod_nct(b, e, m, r);
  13026. #endif
  13027. }
  13028. else
  13029. #endif /* WOLFSSL_SP_MATH_ALL || OPENSSL_ALL */
  13030. if ((!done) && (err == MP_OKAY)) {
  13031. /* Otherwise use the generic implementation hardened against
  13032. * timing and cache attacks. */
  13033. err = _sp_exptmod_ex(b, e, digits * SP_WORD_SIZE, m, r);
  13034. }
  13035. #endif /* WOLFSSL_RSA_VERIFY_ONLY || WOLFSSL_RSA_PUBLIC_ONLY */
  13036. #else
  13037. if ((!done) && (err == MP_OKAY)) {
  13038. err = MP_VAL;
  13039. }
  13040. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  13041. (void)mBits;
  13042. (void)bBits;
  13043. (void)eBits;
  13044. (void)digits;
  13045. #if 0
  13046. if (err == MP_OKAY) {
  13047. sp_print(r, "rme");
  13048. }
  13049. #endif
  13050. return err;
  13051. }
  13052. #endif
  13053. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  13054. !defined(NO_DH) || (!defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)) || \
  13055. defined(OPENSSL_ALL)
  13056. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13057. *
  13058. * @param [in] b SP integer that is the base.
  13059. * @param [in] e SP integer that is the exponent.
  13060. * @param [in] m SP integer that is the modulus.
  13061. * @param [out] r SP integer to hold result.
  13062. *
  13063. * @return MP_OKAY on success.
  13064. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  13065. * @return MP_MEM when dynamic memory allocation fails.
  13066. */
  13067. int sp_exptmod(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
  13068. {
  13069. int err = MP_OKAY;
  13070. /* Validate parameters. */
  13071. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  13072. err = MP_VAL;
  13073. }
  13074. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  13075. if (err == MP_OKAY) {
  13076. err = sp_exptmod_ex(b, e, (int)e->used, m, r);
  13077. }
  13078. RESTORE_VECTOR_REGISTERS();
  13079. return err;
  13080. }
  13081. #endif
  13082. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH)
  13083. #if defined(WOLFSSL_SP_FAST_NCT_EXPTMOD) || !defined(WOLFSSL_SP_SMALL)
  13084. /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13085. * Creates a window of precalculated exponents with base in Montgomery form.
  13086. * Sliding window and is NOT constant time.
  13087. *
  13088. * n-bit window is: (b^(2^(n-1))*b^0)...(b^(2^(n-1))*b^(2^(n-1)-1))
  13089. * e.g. when n=6, b^32..b^63
  13090. * Algorithm:
  13091. * 1. Ensure base is less than modulus.
  13092. * 2. Convert base to Montgomery form
  13093. * 3. Set result to table entry for top window bits, or
  13094. * if less than windows bits in exponent, 1 in Montgomery form.
  13095. * 4. While at least window bits left:
  13096. * 4.1. Count number of and skip leading 0 bits unless less then window bits
  13097. * left.
  13098. * 4.2. Montgomery square result for each leading 0 and window bits if bits
  13099. * left.
  13100. * 4.3. Break if less than window bits left.
  13101. * 4.4. Get top window bits from expononent and drop.
  13102. * 4.5. Montgomery multiply result by table entry.
  13103. * 5. While bits left:
  13104. * 5.1. Montogmery square result
  13105. * 5.2. If exponent bit set
  13106. * 5.2.1. Montgomery multiply result by Montgomery form of base.
  13107. * 6. Convert result back from Montgomery form.
  13108. *
  13109. * @param [in] b SP integer that is the base.
  13110. * @param [in] e SP integer that is the exponent.
  13111. * @param [in] bits Number of bits in exponent to use. May be greater than
  13112. * count of bits in e.
  13113. * @param [in] m SP integer that is the modulus.
  13114. * @param [out] r SP integer to hold result.
  13115. *
  13116. * @return MP_OKAY on success.
  13117. * @return MP_MEM when dynamic memory allocation fails.
  13118. */
  13119. static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
  13120. sp_int* r)
  13121. {
  13122. int i = 0;
  13123. int bits;
  13124. int winBits;
  13125. int preCnt;
  13126. int err = MP_OKAY;
  13127. int done = 0;
  13128. sp_int* tr = NULL;
  13129. sp_int* bm = NULL;
  13130. /* Maximum winBits is 6 and preCnt is (1 << (winBits - 1)). */
  13131. #ifndef WOLFSSL_SP_NO_MALLOC
  13132. DECL_DYN_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
  13133. #else
  13134. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, (1 << 5) + 2);
  13135. #endif
  13136. bits = sp_count_bits(e);
  13137. /* Window bits based on number of pre-calculations versus number of loop
  13138. * calculcations.
  13139. * Exponents for RSA and DH will result in 6-bit windows.
  13140. * Note: for 4096-bit values, 7-bit window is slightly better.
  13141. */
  13142. if (bits > 450) {
  13143. winBits = 6;
  13144. }
  13145. else if (bits <= 21) {
  13146. winBits = 1;
  13147. }
  13148. else if (bits <= 36) {
  13149. winBits = 3;
  13150. }
  13151. else if (bits <= 140) {
  13152. winBits = 4;
  13153. }
  13154. else {
  13155. winBits = 5;
  13156. }
  13157. /* Top bit of exponent fixed as 1 for pre-calculated window. */
  13158. preCnt = 1 << (winBits - 1);
  13159. /* Allocate sp_ints for:
  13160. * - pre-computation table
  13161. * - temporary result
  13162. * - Montgomery form of base
  13163. */
  13164. #ifndef WOLFSSL_SP_NO_MALLOC
  13165. ALLOC_DYN_SP_INT_ARRAY(t, m->used * 2 + 1, (size_t)preCnt + 2, err, NULL);
  13166. #else
  13167. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, (size_t)preCnt + 2, err, NULL);
  13168. #endif
  13169. if (err == MP_OKAY) {
  13170. /* Set variables to use allocate memory. */
  13171. tr = t[preCnt + 0];
  13172. bm = t[preCnt + 1];
  13173. /* Iniitialize all allocated */
  13174. for (i = 0; i < preCnt; i++) {
  13175. _sp_init_size(t[i], m->used * 2 + 1);
  13176. }
  13177. _sp_init_size(tr, m->used * 2 + 1);
  13178. _sp_init_size(bm, m->used * 2 + 1);
  13179. /* 1. Ensure base is less than modulus. */
  13180. if (_sp_cmp_abs(b, m) != MP_LT) {
  13181. err = sp_mod(b, m, bm);
  13182. /* Handle base == modulus. */
  13183. if ((err == MP_OKAY) && sp_iszero(bm)) {
  13184. _sp_set(r, 0);
  13185. done = 1;
  13186. }
  13187. }
  13188. else {
  13189. /* Copy base into Montogmery base variable. */
  13190. _sp_copy(b, bm);
  13191. }
  13192. }
  13193. if ((!done) && (err == MP_OKAY)) {
  13194. int y = 0;
  13195. int c = 0;
  13196. sp_int_digit mp;
  13197. /* Calculate Montgomery multiplier for reduction. */
  13198. _sp_mont_setup(m, &mp);
  13199. /* Calculate Montgomery normalizer for modulus. */
  13200. err = sp_mont_norm(t[0], m);
  13201. if (err == MP_OKAY) {
  13202. /* 2. Convert base to Montgomery form. */
  13203. err = sp_mul(bm, t[0], bm);
  13204. }
  13205. if (err == MP_OKAY) {
  13206. /* bm = bm mod m, temporary size has to be bigger than bm->used. */
  13207. err = _sp_div(bm, m, NULL, bm, bm->used + 1);
  13208. }
  13209. if (err == MP_OKAY) {
  13210. /* Copy Montgomery form of base into first element of table. */
  13211. _sp_copy(bm, t[0]);
  13212. }
  13213. /* Calculate b^(2^(winBits-1)) */
  13214. for (i = 1; (i < winBits) && (err == MP_OKAY); i++) {
  13215. err = sp_sqr(t[0], t[0]);
  13216. if (err == MP_OKAY) {
  13217. err = _sp_mont_red(t[0], m, mp);
  13218. }
  13219. }
  13220. /* For each table entry after first. */
  13221. for (i = 1; (i < preCnt) && (err == MP_OKAY); i++) {
  13222. /* Multiply previous entry by the base in Mont form into table. */
  13223. err = sp_mul(t[i-1], bm, t[i]);
  13224. if (err == MP_OKAY) {
  13225. err = _sp_mont_red(t[i], m, mp);
  13226. }
  13227. }
  13228. /* 3. Set result to table entry for top window bits, or
  13229. * if less than windows bits in exponent, 1 in Montgomery form.
  13230. */
  13231. if (err == MP_OKAY) {
  13232. sp_int_digit n;
  13233. /* Mask for calculating index into pre-computed table. */
  13234. sp_int_digit mask = (sp_int_digit)preCnt - 1;
  13235. /* Find the top bit. */
  13236. i = (bits - 1) >> SP_WORD_SHIFT;
  13237. n = e->dp[i--];
  13238. c = bits % SP_WORD_SIZE;
  13239. if (c == 0) {
  13240. c = SP_WORD_SIZE;
  13241. }
  13242. /* Put top bit at highest offset in digit. */
  13243. n <<= SP_WORD_SIZE - c;
  13244. if (bits >= winBits) {
  13245. /* Top bit set. Copy from window. */
  13246. if (c < winBits) {
  13247. /* Bits to end of digit and part of next */
  13248. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  13249. n = e->dp[i--];
  13250. c = winBits - c;
  13251. y |= (int)(n >> (SP_WORD_SIZE - c));
  13252. n <<= c;
  13253. c = SP_WORD_SIZE - c;
  13254. }
  13255. else {
  13256. /* Bits from middle of digit */
  13257. y = (int)((n >> (SP_WORD_SIZE - winBits)) & mask);
  13258. n <<= winBits;
  13259. c -= winBits;
  13260. }
  13261. _sp_copy(t[y], tr);
  13262. }
  13263. else {
  13264. /* 1 in Montgomery form. */
  13265. err = sp_mont_norm(tr, m);
  13266. }
  13267. /* 4. While at least window bits left. */
  13268. while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
  13269. /* Number of squares to before due to top bits being 0. */
  13270. int sqrs = 0;
  13271. /* 4.1. Count number of and skip leading 0 bits unless less
  13272. * than window bits.
  13273. */
  13274. do {
  13275. /* Make sure n has bits from the right digit. */
  13276. if (c == 0) {
  13277. n = e->dp[i--];
  13278. c = SP_WORD_SIZE;
  13279. }
  13280. /* Mask off the next bit. */
  13281. if ((n & ((sp_int_digit)1 << (SP_WORD_SIZE - 1))) != 0) {
  13282. break;
  13283. }
  13284. /* Another square needed. */
  13285. sqrs++;
  13286. /* Skip bit. */
  13287. n <<= 1;
  13288. c--;
  13289. }
  13290. while ((err == MP_OKAY) && ((i >= 0) || (c >= winBits)));
  13291. if ((err == MP_OKAY) && ((i >= 0) || (c >= winBits))) {
  13292. /* Add squares needed before using table entry. */
  13293. sqrs += winBits;
  13294. }
  13295. /* 4.2. Montgomery square result for each leading 0 and window
  13296. * bits if bits left.
  13297. */
  13298. for (; (err == MP_OKAY) && (sqrs > 0); sqrs--) {
  13299. err = sp_sqr(tr, tr);
  13300. if (err == MP_OKAY) {
  13301. err = _sp_mont_red(tr, m, mp);
  13302. }
  13303. }
  13304. /* 4.3. Break if less than window bits left. */
  13305. if ((err == MP_OKAY) && (i < 0) && (c < winBits)) {
  13306. break;
  13307. }
  13308. /* 4.4. Get top window bits from expononent and drop. */
  13309. if (err == MP_OKAY) {
  13310. if (c == 0) {
  13311. /* Bits from next digit. */
  13312. n = e->dp[i--];
  13313. y = (int)(n >> (SP_WORD_SIZE - winBits));
  13314. n <<= winBits;
  13315. c = SP_WORD_SIZE - winBits;
  13316. }
  13317. else if (c < winBits) {
  13318. /* Bits to end of digit and part of next. */
  13319. y = (int)(n >> (SP_WORD_SIZE - winBits));
  13320. n = e->dp[i--];
  13321. c = winBits - c;
  13322. y |= (int)(n >> (SP_WORD_SIZE - c));
  13323. n <<= c;
  13324. c = SP_WORD_SIZE - c;
  13325. }
  13326. else {
  13327. /* Bits from middle of digit. */
  13328. y = (int)(n >> (SP_WORD_SIZE - winBits));
  13329. n <<= winBits;
  13330. c -= winBits;
  13331. }
  13332. y &= (int)mask;
  13333. }
  13334. /* 4.5. Montgomery multiply result by table entry. */
  13335. if (err == MP_OKAY) {
  13336. err = sp_mul(tr, t[y], tr);
  13337. }
  13338. if (err == MP_OKAY) {
  13339. err = _sp_mont_red(tr, m, mp);
  13340. }
  13341. }
  13342. /* Finished multiplying in table entries. */
  13343. if ((err == MP_OKAY) && (c > 0)) {
  13344. /* Handle remaining bits.
  13345. * Window values have top bit set and can't be used. */
  13346. n = e->dp[0];
  13347. /* 5. While bits left: */
  13348. for (--c; (err == MP_OKAY) && (c >= 0); c--) {
  13349. /* 5.1. Montogmery square result */
  13350. err = sp_sqr(tr, tr);
  13351. if (err == MP_OKAY) {
  13352. err = _sp_mont_red(tr, m, mp);
  13353. }
  13354. /* 5.2. If exponent bit set */
  13355. if ((err == MP_OKAY) && ((n >> c) & 1)) {
  13356. /* 5.2.1. Montgomery multiply result by Montgomery form
  13357. * of base.
  13358. */
  13359. err = sp_mul(tr, bm, tr);
  13360. if (err == MP_OKAY) {
  13361. err = _sp_mont_red(tr, m, mp);
  13362. }
  13363. }
  13364. }
  13365. }
  13366. }
  13367. if (err == MP_OKAY) {
  13368. /* 6. Convert result back from Montgomery form. */
  13369. err = _sp_mont_red(tr, m, mp);
  13370. /* Reduction implementation returns number to range: 0..m-1. */
  13371. }
  13372. }
  13373. if ((!done) && (err == MP_OKAY)) {
  13374. /* Copy temporary result into parameter. */
  13375. _sp_copy(tr, r);
  13376. }
  13377. #ifndef WOLFSSL_SP_NO_MALLOC
  13378. FREE_DYN_SP_INT_ARRAY(t, NULL);
  13379. #else
  13380. FREE_SP_INT_ARRAY(t, NULL);
  13381. #endif
  13382. return err;
  13383. }
  13384. #else
  13385. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13386. * Non-constant time implementation.
  13387. *
  13388. * Algorithm:
  13389. * 1. Convert base to Montgomery form
  13390. * 2. Set result to base (assumes exponent is not zero)
  13391. * 3. For each bit in exponent starting at second highest
  13392. * 3.1. Montogmery square result
  13393. * 3.2. If exponent bit set
  13394. * 3.2.1. Montgomery multiply result by Montgomery form of base.
  13395. * 4. Convert result back from Montgomery form.
  13396. *
  13397. * @param [in] b SP integer that is the base.
  13398. * @param [in] e SP integer that is the exponent.
  13399. * @param [in] m SP integer that is the modulus.
  13400. * @param [out] r SP integer to hold result.
  13401. *
  13402. * @return MP_OKAY on success.
  13403. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  13404. * @return MP_MEM when dynamic memory allocation fails.
  13405. */
  13406. static int _sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m,
  13407. sp_int* r)
  13408. {
  13409. int i;
  13410. int err = MP_OKAY;
  13411. int done = 0;
  13412. int y = 0;
  13413. int bits = sp_count_bits(e);
  13414. sp_int_digit mp;
  13415. DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 2);
  13416. /* Allocate memory for:
  13417. * - Montgomery form of base
  13418. * - Temporary result (in case r is same var as another parameter). */
  13419. ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 2, err, NULL);
  13420. if (err == MP_OKAY) {
  13421. _sp_init_size(t[0], m->used * 2 + 1);
  13422. _sp_init_size(t[1], m->used * 2 + 1);
  13423. /* Ensure base is less than modulus and copy into temp. */
  13424. if (_sp_cmp_abs(b, m) != MP_LT) {
  13425. err = sp_mod(b, m, t[0]);
  13426. /* Handle base == modulus. */
  13427. if ((err == MP_OKAY) && sp_iszero(t[0])) {
  13428. _sp_set(r, 0);
  13429. done = 1;
  13430. }
  13431. }
  13432. else {
  13433. /* Copy base into temp. */
  13434. _sp_copy(b, t[0]);
  13435. }
  13436. }
  13437. if ((!done) && (err == MP_OKAY)) {
  13438. /* Calculate Montgomery multiplier for reduction. */
  13439. _sp_mont_setup(m, &mp);
  13440. /* Calculate Montgomery normalizer for modulus. */
  13441. err = sp_mont_norm(t[1], m);
  13442. if (err == MP_OKAY) {
  13443. /* 1. Convert base to Montgomery form. */
  13444. err = sp_mul(t[0], t[1], t[0]);
  13445. }
  13446. if (err == MP_OKAY) {
  13447. /* t[0] = t[0] mod m, temporary size has to be bigger than t[0]. */
  13448. err = _sp_div(t[0], m, NULL, t[0], t[0]->used + 1);
  13449. }
  13450. if (err == MP_OKAY) {
  13451. /* 2. Result starts as Montgomery form of base (assuming e > 0). */
  13452. _sp_copy(t[0], t[1]);
  13453. }
  13454. /* 3. For each bit in exponent starting at second highest. */
  13455. for (i = bits - 2; (err == MP_OKAY) && (i >= 0); i--) {
  13456. /* 3.1. Montgomery square result. */
  13457. err = sp_sqr(t[0], t[0]);
  13458. if (err == MP_OKAY) {
  13459. err = _sp_mont_red(t[0], m, mp);
  13460. }
  13461. if (err == MP_OKAY) {
  13462. /* Get bit and index i. */
  13463. y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1;
  13464. /* 3.2. If exponent bit set */
  13465. if (y != 0) {
  13466. /* 3.2.1. Montgomery multiply result by Mont of base. */
  13467. err = sp_mul(t[0], t[1], t[0]);
  13468. if (err == MP_OKAY) {
  13469. err = _sp_mont_red(t[0], m, mp);
  13470. }
  13471. }
  13472. }
  13473. }
  13474. if (err == MP_OKAY) {
  13475. /* 4. Convert from Montgomery form. */
  13476. err = _sp_mont_red(t[0], m, mp);
  13477. /* Reduction implementation returns number of range 0..m-1. */
  13478. }
  13479. }
  13480. if ((!done) && (err == MP_OKAY)) {
  13481. /* Copy temporary result into parameter. */
  13482. _sp_copy(t[0], r);
  13483. }
  13484. FREE_SP_INT_ARRAY(t, NULL);
  13485. return err;
  13486. }
  13487. #endif /* WOLFSSL_SP_FAST_NCT_EXPTMOD || !WOLFSSL_SP_SMALL */
  13488. /* Exponentiates b to the power of e modulo m into r: r = b ^ e mod m
  13489. * Non-constant time implementation.
  13490. *
  13491. * @param [in] b SP integer that is the base.
  13492. * @param [in] e SP integer that is the exponent.
  13493. * @param [in] m SP integer that is the modulus.
  13494. * @param [out] r SP integer to hold result.
  13495. *
  13496. * @return MP_OKAY on success.
  13497. * @return MP_VAL when b, e, m or r is NULL; or m <= 0 or e is negative.
  13498. * @return MP_MEM when dynamic memory allocation fails.
  13499. */
  13500. int sp_exptmod_nct(const sp_int* b, const sp_int* e, const sp_int* m, sp_int* r)
  13501. {
  13502. int err = MP_OKAY;
  13503. /* Validate parameters. */
  13504. if ((b == NULL) || (e == NULL) || (m == NULL) || (r == NULL)) {
  13505. err = MP_VAL;
  13506. }
  13507. #if 0
  13508. if (err == MP_OKAY) {
  13509. sp_print(b, "a");
  13510. sp_print(e, "b");
  13511. sp_print(m, "m");
  13512. }
  13513. #endif
  13514. if (err != MP_OKAY) {
  13515. }
  13516. /* Handle special cases. */
  13517. else if (sp_iszero(m)) {
  13518. err = MP_VAL;
  13519. }
  13520. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13521. else if ((e->sign == MP_NEG) || (m->sign == MP_NEG)) {
  13522. err = MP_VAL;
  13523. }
  13524. #endif
  13525. /* x mod 1 is always 0. */
  13526. else if (sp_isone(m)) {
  13527. _sp_set(r, 0);
  13528. }
  13529. /* b^0 mod m = 1 mod m = 1. */
  13530. else if (sp_iszero(e)) {
  13531. _sp_set(r, 1);
  13532. }
  13533. /* 0^x mod m = 0 mod m = 0. */
  13534. else if (sp_iszero(b)) {
  13535. _sp_set(r, 0);
  13536. }
  13537. /* Ensure SP integers have space for intermediate values. */
  13538. else if (m->used * 2 >= r->size) {
  13539. err = MP_VAL;
  13540. }
  13541. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) && !defined(WOLFSSL_RSA_PUBLIC_ONLY)
  13542. else if (mp_iseven(m)) {
  13543. err = _sp_exptmod_ex(b, e, (int)(e->used * SP_WORD_SIZE), m, r);
  13544. }
  13545. #endif
  13546. else {
  13547. err = _sp_exptmod_nct(b, e, m, r);
  13548. }
  13549. #if 0
  13550. if (err == MP_OKAY) {
  13551. sp_print(r, "rme");
  13552. }
  13553. #endif
  13554. return err;
  13555. }
  13556. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH */
  13557. /***************
  13558. * 2^e functions
  13559. ***************/
  13560. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  13561. /* Divide by 2^e: r = a >> e and rem = bits shifted out
  13562. *
  13563. * @param [in] a SP integer to divide.
  13564. * @param [in] e Exponent bits (dividing by 2^e).
  13565. * @param [in] m SP integer that is the modulus.
  13566. * @param [out] r SP integer to hold result.
  13567. * @param [out] rem SP integer to hold remainder.
  13568. *
  13569. * @return MP_OKAY on success.
  13570. * @return MP_VAL when a is NULL or e is negative.
  13571. */
  13572. int sp_div_2d(const sp_int* a, int e, sp_int* r, sp_int* rem)
  13573. {
  13574. int err = MP_OKAY;
  13575. if ((a == NULL) || (e < 0)) {
  13576. err = MP_VAL;
  13577. }
  13578. if (err == MP_OKAY) {
  13579. /* Number of bits remaining after shift. */
  13580. int remBits = sp_count_bits(a) - e;
  13581. if (remBits <= 0) {
  13582. /* Shifting down by more bits than in number. */
  13583. _sp_zero(r);
  13584. if (rem != NULL) {
  13585. err = sp_copy(a, rem);
  13586. }
  13587. }
  13588. else {
  13589. if (rem != NULL) {
  13590. /* Copy a in to remainder. */
  13591. err = sp_copy(a, rem);
  13592. }
  13593. if (err == MP_OKAY) {
  13594. /* Shift a down by into result. */
  13595. err = sp_rshb(a, e, r);
  13596. }
  13597. if ((err == MP_OKAY) && (rem != NULL)) {
  13598. /* Set used and mask off top digit of remainder. */
  13599. rem->used = ((unsigned int)e + SP_WORD_SIZE - 1) >>
  13600. SP_WORD_SHIFT;
  13601. e &= SP_WORD_MASK;
  13602. if (e > 0) {
  13603. rem->dp[rem->used - 1] &= ((sp_int_digit)1 << e) - 1;
  13604. }
  13605. /* Remove leading zeros from remainder. */
  13606. sp_clamp(rem);
  13607. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13608. rem->sign = MP_ZPOS;
  13609. #endif
  13610. }
  13611. }
  13612. }
  13613. return err;
  13614. }
  13615. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  13616. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)
  13617. /* The bottom e bits: r = a & ((1 << e) - 1)
  13618. *
  13619. * @param [in] a SP integer to reduce.
  13620. * @param [in] e Modulus bits (modulus equals 2^e).
  13621. * @param [out] r SP integer to hold result.
  13622. *
  13623. * @return MP_OKAY on success.
  13624. * @return MP_VAL when a or r is NULL, e is negative or e is too large for
  13625. * result.
  13626. */
  13627. int sp_mod_2d(const sp_int* a, int e, sp_int* r)
  13628. {
  13629. int err = MP_OKAY;
  13630. unsigned int digits = ((unsigned int)e + SP_WORD_SIZE - 1) >> SP_WORD_SHIFT;
  13631. if ((a == NULL) || (r == NULL) || (e < 0)) {
  13632. err = MP_VAL;
  13633. }
  13634. if ((err == MP_OKAY) && (digits > r->size)) {
  13635. err = MP_VAL;
  13636. }
  13637. if (err == MP_OKAY) {
  13638. /* Copy a into r if not same pointer. */
  13639. if (a != r) {
  13640. XMEMCPY(r->dp, a->dp, digits * SP_WORD_SIZEOF);
  13641. r->used = a->used;
  13642. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13643. r->sign = a->sign;
  13644. #endif
  13645. }
  13646. /* Modify result if a is bigger or same digit size. */
  13647. #ifndef WOLFSSL_SP_INT_NEGATIVE
  13648. if (digits <= a->used)
  13649. #else
  13650. /* Need to make negative positive and mask. */
  13651. if ((a->sign == MP_NEG) || (digits <= a->used))
  13652. #endif
  13653. {
  13654. #ifdef WOLFSSL_SP_INT_NEGATIVE
  13655. if (a->sign == MP_NEG) {
  13656. unsigned int i;
  13657. sp_int_digit carry = 0;
  13658. /* Negate value. */
  13659. for (i = 0; i < r->used; i++) {
  13660. sp_int_digit next = r->dp[i] > 0;
  13661. r->dp[i] = (sp_int_digit)0 - r->dp[i] - carry;
  13662. carry |= next;
  13663. }
  13664. for (; i < digits; i++) {
  13665. r->dp[i] = (sp_int_digit)0 - carry;
  13666. }
  13667. r->sign = MP_ZPOS;
  13668. }
  13669. #endif
  13670. /* Set used and mask off top digit of result. */
  13671. r->used = digits;
  13672. e &= SP_WORD_MASK;
  13673. if (e > 0) {
  13674. r->dp[r->used - 1] &= ((sp_int_digit)1 << e) - 1;
  13675. }
  13676. sp_clamp(r);
  13677. }
  13678. }
  13679. return err;
  13680. }
  13681. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  13682. #if (defined(WOLFSSL_SP_MATH_ALL) && (!defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  13683. !defined(NO_DH))) || defined(OPENSSL_ALL)
  13684. /* Multiply by 2^e: r = a << e
  13685. *
  13686. * @param [in] a SP integer to multiply.
  13687. * @param [in] e Multiplier bits (multiplier equals 2^e).
  13688. * @param [out] r SP integer to hold result.
  13689. *
  13690. * @return MP_OKAY on success.
  13691. * @return MP_VAL when a or r is NULL, e is negative, or result is too big for
  13692. * result size.
  13693. */
  13694. int sp_mul_2d(const sp_int* a, int e, sp_int* r)
  13695. {
  13696. int err = MP_OKAY;
  13697. /* Validate parameters. */
  13698. if ((a == NULL) || (r == NULL) || (e < 0)) {
  13699. err = MP_VAL;
  13700. }
  13701. /* Ensure result has enough allocated digits for result. */
  13702. if ((err == MP_OKAY) &&
  13703. ((unsigned int)(sp_count_bits(a) + e) > r->size * SP_WORD_SIZE)) {
  13704. err = MP_VAL;
  13705. }
  13706. if (err == MP_OKAY) {
  13707. /* Copy a into r as left shift function works on the number. */
  13708. if (a != r) {
  13709. err = sp_copy(a, r);
  13710. }
  13711. }
  13712. if (err == MP_OKAY) {
  13713. #if 0
  13714. sp_print(a, "a");
  13715. sp_print_int(e, "n");
  13716. #endif
  13717. err = sp_lshb(r, e);
  13718. #if 0
  13719. sp_print(r, "rsl");
  13720. #endif
  13721. }
  13722. return err;
  13723. }
  13724. #endif /* WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY */
  13725. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  13726. defined(HAVE_ECC) || (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY))
  13727. /* START SP_SQR implementations */
  13728. /* This code is generated.
  13729. * To generate:
  13730. * cd scripts/sp/sp_int
  13731. * ./gen.sh
  13732. * File sp_sqr.c contains code.
  13733. */
  13734. #if !defined(WOLFSSL_SP_MATH) || !defined(WOLFSSL_SP_SMALL)
  13735. #ifdef SQR_MUL_ASM
  13736. /* Square a and store in r. r = a * a
  13737. *
  13738. * @param [in] a SP integer to square.
  13739. * @param [out] r SP integer result.
  13740. *
  13741. * @return MP_OKAY on success.
  13742. * @return MP_MEM when dynamic memory allocation fails.
  13743. */
  13744. static int _sp_sqr(const sp_int* a, sp_int* r)
  13745. {
  13746. int err = MP_OKAY;
  13747. unsigned int i;
  13748. int j;
  13749. unsigned int k;
  13750. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13751. sp_int_digit* t = NULL;
  13752. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  13753. !defined(WOLFSSL_SP_NO_DYN_STACK)
  13754. sp_int_digit t[((a->used + 1) / 2) * 2 + 1];
  13755. #else
  13756. sp_int_digit t[(SP_INT_DIGITS + 1) / 2];
  13757. #endif
  13758. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13759. t = (sp_int_digit*)XMALLOC(
  13760. sizeof(sp_int_digit) * (((a->used + 1) / 2) * 2 + 1), NULL,
  13761. DYNAMIC_TYPE_BIGINT);
  13762. if (t == NULL) {
  13763. err = MP_MEM;
  13764. }
  13765. #endif
  13766. if ((err == MP_OKAY) && (a->used <= 1)) {
  13767. sp_int_digit l;
  13768. sp_int_digit h;
  13769. h = 0;
  13770. l = 0;
  13771. SP_ASM_SQR(h, l, a->dp[0]);
  13772. r->dp[0] = h;
  13773. r->dp[1] = l;
  13774. }
  13775. else if (err == MP_OKAY) {
  13776. sp_int_digit l;
  13777. sp_int_digit h;
  13778. sp_int_digit o;
  13779. sp_int_digit* p = t;
  13780. h = 0;
  13781. l = 0;
  13782. SP_ASM_SQR(h, l, a->dp[0]);
  13783. t[0] = h;
  13784. h = 0;
  13785. o = 0;
  13786. for (k = 1; k < (a->used + 1) / 2; k++) {
  13787. i = k;
  13788. j = (int)(k - 1);
  13789. for (; (j >= 0); i++, j--) {
  13790. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13791. }
  13792. t[k * 2 - 1] = l;
  13793. l = h;
  13794. h = o;
  13795. o = 0;
  13796. SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
  13797. i = k + 1;
  13798. j = (int)(k - 1);
  13799. for (; (j >= 0); i++, j--) {
  13800. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13801. }
  13802. t[k * 2] = l;
  13803. l = h;
  13804. h = o;
  13805. o = 0;
  13806. }
  13807. for (; k < a->used; k++) {
  13808. i = k;
  13809. j = (int)(k - 1);
  13810. for (; (i < a->used); i++, j--) {
  13811. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13812. }
  13813. p[k * 2 - 1] = l;
  13814. l = h;
  13815. h = o;
  13816. o = 0;
  13817. SP_ASM_SQR_ADD(l, h, o, a->dp[k]);
  13818. i = k + 1;
  13819. j = (int)(k - 1);
  13820. for (; (i < a->used); i++, j--) {
  13821. SP_ASM_MUL_ADD2(l, h, o, a->dp[i], a->dp[j]);
  13822. }
  13823. p[k * 2] = l;
  13824. l = h;
  13825. h = o;
  13826. o = 0;
  13827. p = r->dp;
  13828. }
  13829. r->dp[k * 2 - 1] = l;
  13830. XMEMCPY(r->dp, t, (((a->used + 1) / 2) * 2 + 1) * sizeof(sp_int_digit));
  13831. }
  13832. if (err == MP_OKAY) {
  13833. r->used = a->used * 2;
  13834. sp_clamp(r);
  13835. }
  13836. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13837. if (t != NULL) {
  13838. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  13839. }
  13840. #endif
  13841. return err;
  13842. }
  13843. #else /* !SQR_MUL_ASM */
  13844. /* Square a and store in r. r = a * a
  13845. *
  13846. * @param [in] a SP integer to square.
  13847. * @param [out] r SP integer result.
  13848. *
  13849. * @return MP_OKAY on success.
  13850. * @return MP_MEM when dynamic memory allocation fails.
  13851. */
  13852. static int _sp_sqr(const sp_int* a, sp_int* r)
  13853. {
  13854. int err = MP_OKAY;
  13855. unsigned int i;
  13856. int j;
  13857. unsigned int k;
  13858. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13859. sp_int_digit* t = NULL;
  13860. #elif defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  13861. !defined(WOLFSSL_SP_NO_DYN_STACK)
  13862. sp_int_digit t[a->used * 2];
  13863. #else
  13864. sp_int_digit t[SP_INT_DIGITS];
  13865. #endif
  13866. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13867. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * (a->used * 2), NULL,
  13868. DYNAMIC_TYPE_BIGINT);
  13869. if (t == NULL) {
  13870. err = MP_MEM;
  13871. }
  13872. #endif
  13873. if (err == MP_OKAY) {
  13874. sp_int_word w;
  13875. sp_int_word l;
  13876. sp_int_word h;
  13877. #ifdef SP_WORD_OVERFLOW
  13878. sp_int_word o;
  13879. #endif
  13880. w = (sp_int_word)a->dp[0] * a->dp[0];
  13881. t[0] = (sp_int_digit)w;
  13882. l = (sp_int_digit)(w >> SP_WORD_SIZE);
  13883. h = 0;
  13884. #ifdef SP_WORD_OVERFLOW
  13885. o = 0;
  13886. #endif
  13887. for (k = 1; k <= (a->used - 1) * 2; k++) {
  13888. i = k / 2;
  13889. j = (int)(k - i);
  13890. if (i == (unsigned int)j) {
  13891. w = (sp_int_word)a->dp[i] * a->dp[j];
  13892. l += (sp_int_digit)w;
  13893. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  13894. #ifdef SP_WORD_OVERFLOW
  13895. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  13896. l &= SP_MASK;
  13897. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  13898. h &= SP_MASK;
  13899. #endif
  13900. }
  13901. for (++i, --j; (i < a->used) && (j >= 0); i++, j--) {
  13902. w = (sp_int_word)a->dp[i] * a->dp[j];
  13903. l += (sp_int_digit)w;
  13904. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  13905. #ifdef SP_WORD_OVERFLOW
  13906. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  13907. l &= SP_MASK;
  13908. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  13909. h &= SP_MASK;
  13910. #endif
  13911. l += (sp_int_digit)w;
  13912. h += (sp_int_digit)(w >> SP_WORD_SIZE);
  13913. #ifdef SP_WORD_OVERFLOW
  13914. h += (sp_int_digit)(l >> SP_WORD_SIZE);
  13915. l &= SP_MASK;
  13916. o += (sp_int_digit)(h >> SP_WORD_SIZE);
  13917. h &= SP_MASK;
  13918. #endif
  13919. }
  13920. t[k] = (sp_int_digit)l;
  13921. l >>= SP_WORD_SIZE;
  13922. l += (sp_int_digit)h;
  13923. h >>= SP_WORD_SIZE;
  13924. #ifdef SP_WORD_OVERFLOW
  13925. h += o & SP_MASK;
  13926. o >>= SP_WORD_SIZE;
  13927. #endif
  13928. }
  13929. t[k] = (sp_int_digit)l;
  13930. r->used = k + 1;
  13931. XMEMCPY(r->dp, t, r->used * sizeof(sp_int_digit));
  13932. sp_clamp(r);
  13933. }
  13934. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13935. if (t != NULL) {
  13936. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  13937. }
  13938. #endif
  13939. return err;
  13940. }
  13941. #endif /* SQR_MUL_ASM */
  13942. #endif /* !WOLFSSL_SP_MATH || !WOLFSSL_SP_SMALL */
  13943. #ifndef WOLFSSL_SP_SMALL
  13944. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  13945. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  13946. #ifndef SQR_MUL_ASM
  13947. /* Square a and store in r. r = a * a
  13948. *
  13949. * Long-hand implementation.
  13950. *
  13951. * @param [in] a SP integer to square.
  13952. * @param [out] r SP integer result.
  13953. *
  13954. * @return MP_OKAY on success.
  13955. * @return MP_MEM when dynamic memory allocation fails.
  13956. */
  13957. static int _sp_sqr_4(const sp_int* a, sp_int* r)
  13958. {
  13959. int err = MP_OKAY;
  13960. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13961. sp_int_word* w = NULL;
  13962. #else
  13963. sp_int_word w[10];
  13964. #endif
  13965. const sp_int_digit* da = a->dp;
  13966. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  13967. w = (sp_int_word*)XMALLOC(sizeof(sp_int_word) * 10, NULL,
  13968. DYNAMIC_TYPE_BIGINT);
  13969. if (w == NULL) {
  13970. err = MP_MEM;
  13971. }
  13972. #endif
  13973. if (err == MP_OKAY) {
  13974. w[0] = (sp_int_word)da[0] * da[0];
  13975. w[1] = (sp_int_word)da[0] * da[1];
  13976. w[2] = (sp_int_word)da[0] * da[2];
  13977. w[3] = (sp_int_word)da[1] * da[1];
  13978. w[4] = (sp_int_word)da[0] * da[3];
  13979. w[5] = (sp_int_word)da[1] * da[2];
  13980. w[6] = (sp_int_word)da[1] * da[3];
  13981. w[7] = (sp_int_word)da[2] * da[2];
  13982. w[8] = (sp_int_word)da[2] * da[3];
  13983. w[9] = (sp_int_word)da[3] * da[3];
  13984. r->dp[0] = (sp_int_digit)w[0];
  13985. w[0] >>= SP_WORD_SIZE;
  13986. w[0] += (sp_int_digit)w[1];
  13987. w[0] += (sp_int_digit)w[1];
  13988. r->dp[1] = (sp_int_digit)w[0];
  13989. w[0] >>= SP_WORD_SIZE;
  13990. w[1] >>= SP_WORD_SIZE;
  13991. w[0] += (sp_int_digit)w[1];
  13992. w[0] += (sp_int_digit)w[1];
  13993. w[0] += (sp_int_digit)w[2];
  13994. w[0] += (sp_int_digit)w[2];
  13995. w[0] += (sp_int_digit)w[3];
  13996. r->dp[2] = (sp_int_digit)w[0];
  13997. w[0] >>= SP_WORD_SIZE;
  13998. w[2] >>= SP_WORD_SIZE;
  13999. w[0] += (sp_int_digit)w[2];
  14000. w[0] += (sp_int_digit)w[2];
  14001. w[3] >>= SP_WORD_SIZE;
  14002. w[0] += (sp_int_digit)w[3];
  14003. w[0] += (sp_int_digit)w[4];
  14004. w[0] += (sp_int_digit)w[4];
  14005. w[0] += (sp_int_digit)w[5];
  14006. w[0] += (sp_int_digit)w[5];
  14007. r->dp[3] = (sp_int_digit)w[0];
  14008. w[0] >>= SP_WORD_SIZE;
  14009. w[4] >>= SP_WORD_SIZE;
  14010. w[0] += (sp_int_digit)w[4];
  14011. w[0] += (sp_int_digit)w[4];
  14012. w[5] >>= SP_WORD_SIZE;
  14013. w[0] += (sp_int_digit)w[5];
  14014. w[0] += (sp_int_digit)w[5];
  14015. w[0] += (sp_int_digit)w[6];
  14016. w[0] += (sp_int_digit)w[6];
  14017. w[0] += (sp_int_digit)w[7];
  14018. r->dp[4] = (sp_int_digit)w[0];
  14019. w[0] >>= SP_WORD_SIZE;
  14020. w[6] >>= SP_WORD_SIZE;
  14021. w[0] += (sp_int_digit)w[6];
  14022. w[0] += (sp_int_digit)w[6];
  14023. w[7] >>= SP_WORD_SIZE;
  14024. w[0] += (sp_int_digit)w[7];
  14025. w[0] += (sp_int_digit)w[8];
  14026. w[0] += (sp_int_digit)w[8];
  14027. r->dp[5] = (sp_int_digit)w[0];
  14028. w[0] >>= SP_WORD_SIZE;
  14029. w[8] >>= SP_WORD_SIZE;
  14030. w[0] += (sp_int_digit)w[8];
  14031. w[0] += (sp_int_digit)w[8];
  14032. w[0] += (sp_int_digit)w[9];
  14033. r->dp[6] = (sp_int_digit)w[0];
  14034. w[0] >>= SP_WORD_SIZE;
  14035. w[9] >>= SP_WORD_SIZE;
  14036. w[0] += (sp_int_digit)w[9];
  14037. r->dp[7] = (sp_int_digit)w[0];
  14038. r->used = 8;
  14039. sp_clamp(r);
  14040. }
  14041. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14042. if (w != NULL) {
  14043. XFREE(w, NULL, DYNAMIC_TYPE_BIGINT);
  14044. }
  14045. #endif
  14046. return err;
  14047. }
  14048. #else /* SQR_MUL_ASM */
  14049. /* Square a and store in r. r = a * a
  14050. *
  14051. * Comba implementation.
  14052. *
  14053. * @param [in] a SP integer to square.
  14054. * @param [out] r SP integer result.
  14055. *
  14056. * @return MP_OKAY on success.
  14057. * @return MP_MEM when dynamic memory allocation fails.
  14058. */
  14059. static int _sp_sqr_4(const sp_int* a, sp_int* r)
  14060. {
  14061. sp_int_digit l = 0;
  14062. sp_int_digit h = 0;
  14063. sp_int_digit o = 0;
  14064. sp_int_digit t[4];
  14065. SP_ASM_SQR(h, l, a->dp[0]);
  14066. t[0] = h;
  14067. h = 0;
  14068. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14069. t[1] = l;
  14070. l = h;
  14071. h = o;
  14072. o = 0;
  14073. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14074. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14075. t[2] = l;
  14076. l = h;
  14077. h = o;
  14078. o = 0;
  14079. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14080. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14081. t[3] = l;
  14082. l = h;
  14083. h = o;
  14084. o = 0;
  14085. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14086. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14087. r->dp[4] = l;
  14088. l = h;
  14089. h = o;
  14090. o = 0;
  14091. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[3]);
  14092. r->dp[5] = l;
  14093. l = h;
  14094. h = o;
  14095. SP_ASM_SQR_ADD_NO(l, h, a->dp[3]);
  14096. r->dp[6] = l;
  14097. r->dp[7] = h;
  14098. XMEMCPY(r->dp, t, 4 * sizeof(sp_int_digit));
  14099. r->used = 8;
  14100. sp_clamp(r);
  14101. return MP_OKAY;
  14102. }
  14103. #endif /* SQR_MUL_ASM */
  14104. #endif /* SP_WORD_SIZE == 64 */
  14105. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  14106. #ifdef SQR_MUL_ASM
  14107. /* Square a and store in r. r = a * a
  14108. *
  14109. * Comba implementation.
  14110. *
  14111. * @param [in] a SP integer to square.
  14112. * @param [out] r SP integer result.
  14113. *
  14114. * @return MP_OKAY on success.
  14115. * @return MP_MEM when dynamic memory allocation fails.
  14116. */
  14117. static int _sp_sqr_6(const sp_int* a, sp_int* r)
  14118. {
  14119. sp_int_digit l = 0;
  14120. sp_int_digit h = 0;
  14121. sp_int_digit o = 0;
  14122. sp_int_digit tl = 0;
  14123. sp_int_digit th = 0;
  14124. sp_int_digit to;
  14125. sp_int_digit t[6];
  14126. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14127. to = 0;
  14128. #endif
  14129. SP_ASM_SQR(h, l, a->dp[0]);
  14130. t[0] = h;
  14131. h = 0;
  14132. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14133. t[1] = l;
  14134. l = h;
  14135. h = o;
  14136. o = 0;
  14137. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14138. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14139. t[2] = l;
  14140. l = h;
  14141. h = o;
  14142. o = 0;
  14143. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14144. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14145. t[3] = l;
  14146. l = h;
  14147. h = o;
  14148. o = 0;
  14149. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14150. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14151. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14152. t[4] = l;
  14153. l = h;
  14154. h = o;
  14155. o = 0;
  14156. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14157. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14158. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14159. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14160. t[5] = l;
  14161. l = h;
  14162. h = o;
  14163. o = 0;
  14164. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[5]);
  14165. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[4]);
  14166. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14167. r->dp[6] = l;
  14168. l = h;
  14169. h = o;
  14170. o = 0;
  14171. SP_ASM_MUL_ADD2(l, h, o, a->dp[2], a->dp[5]);
  14172. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[4]);
  14173. r->dp[7] = l;
  14174. l = h;
  14175. h = o;
  14176. o = 0;
  14177. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[5]);
  14178. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14179. r->dp[8] = l;
  14180. l = h;
  14181. h = o;
  14182. o = 0;
  14183. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[5]);
  14184. r->dp[9] = l;
  14185. l = h;
  14186. h = o;
  14187. SP_ASM_SQR_ADD_NO(l, h, a->dp[5]);
  14188. r->dp[10] = l;
  14189. r->dp[11] = h;
  14190. XMEMCPY(r->dp, t, 6 * sizeof(sp_int_digit));
  14191. r->used = 12;
  14192. sp_clamp(r);
  14193. return MP_OKAY;
  14194. }
  14195. #endif /* SQR_MUL_ASM */
  14196. #endif /* SP_WORD_SIZE == 64 */
  14197. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  14198. #ifdef SQR_MUL_ASM
  14199. /* Square a and store in r. r = a * a
  14200. *
  14201. * Comba implementation.
  14202. *
  14203. * @param [in] a SP integer to square.
  14204. * @param [out] r SP integer result.
  14205. *
  14206. * @return MP_OKAY on success.
  14207. * @return MP_MEM when dynamic memory allocation fails.
  14208. */
  14209. static int _sp_sqr_8(const sp_int* a, sp_int* r)
  14210. {
  14211. sp_int_digit l = 0;
  14212. sp_int_digit h = 0;
  14213. sp_int_digit o = 0;
  14214. sp_int_digit tl = 0;
  14215. sp_int_digit th = 0;
  14216. sp_int_digit to;
  14217. sp_int_digit t[8];
  14218. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14219. to = 0;
  14220. #endif
  14221. SP_ASM_SQR(h, l, a->dp[0]);
  14222. t[0] = h;
  14223. h = 0;
  14224. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14225. t[1] = l;
  14226. l = h;
  14227. h = o;
  14228. o = 0;
  14229. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14230. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14231. t[2] = l;
  14232. l = h;
  14233. h = o;
  14234. o = 0;
  14235. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14236. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14237. t[3] = l;
  14238. l = h;
  14239. h = o;
  14240. o = 0;
  14241. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14242. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14243. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14244. t[4] = l;
  14245. l = h;
  14246. h = o;
  14247. o = 0;
  14248. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14249. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14250. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14251. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14252. t[5] = l;
  14253. l = h;
  14254. h = o;
  14255. o = 0;
  14256. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  14257. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  14258. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  14259. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14260. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14261. t[6] = l;
  14262. l = h;
  14263. h = o;
  14264. o = 0;
  14265. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  14266. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  14267. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  14268. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  14269. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14270. t[7] = l;
  14271. l = h;
  14272. h = o;
  14273. o = 0;
  14274. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[7]);
  14275. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  14276. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  14277. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14278. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14279. r->dp[8] = l;
  14280. l = h;
  14281. h = o;
  14282. o = 0;
  14283. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[7]);
  14284. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  14285. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  14286. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14287. r->dp[9] = l;
  14288. l = h;
  14289. h = o;
  14290. o = 0;
  14291. SP_ASM_MUL_ADD2(l, h, o, a->dp[3], a->dp[7]);
  14292. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[6]);
  14293. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  14294. r->dp[10] = l;
  14295. l = h;
  14296. h = o;
  14297. o = 0;
  14298. SP_ASM_MUL_ADD2(l, h, o, a->dp[4], a->dp[7]);
  14299. SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[6]);
  14300. r->dp[11] = l;
  14301. l = h;
  14302. h = o;
  14303. o = 0;
  14304. SP_ASM_MUL_ADD2(l, h, o, a->dp[5], a->dp[7]);
  14305. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  14306. r->dp[12] = l;
  14307. l = h;
  14308. h = o;
  14309. o = 0;
  14310. SP_ASM_MUL_ADD2(l, h, o, a->dp[6], a->dp[7]);
  14311. r->dp[13] = l;
  14312. l = h;
  14313. h = o;
  14314. SP_ASM_SQR_ADD_NO(l, h, a->dp[7]);
  14315. r->dp[14] = l;
  14316. r->dp[15] = h;
  14317. XMEMCPY(r->dp, t, 8 * sizeof(sp_int_digit));
  14318. r->used = 16;
  14319. sp_clamp(r);
  14320. return MP_OKAY;
  14321. }
  14322. #endif /* SQR_MUL_ASM */
  14323. #endif /* SP_WORD_SIZE == 32 */
  14324. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  14325. #ifdef SQR_MUL_ASM
  14326. /* Square a and store in r. r = a * a
  14327. *
  14328. * Comba implementation.
  14329. *
  14330. * @param [in] a SP integer to square.
  14331. * @param [out] r SP integer result.
  14332. *
  14333. * @return MP_OKAY on success.
  14334. * @return MP_MEM when dynamic memory allocation fails.
  14335. */
  14336. static int _sp_sqr_12(const sp_int* a, sp_int* r)
  14337. {
  14338. sp_int_digit l = 0;
  14339. sp_int_digit h = 0;
  14340. sp_int_digit o = 0;
  14341. sp_int_digit tl = 0;
  14342. sp_int_digit th = 0;
  14343. sp_int_digit to;
  14344. sp_int_digit t[12];
  14345. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14346. to = 0;
  14347. #endif
  14348. SP_ASM_SQR(h, l, a->dp[0]);
  14349. t[0] = h;
  14350. h = 0;
  14351. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14352. t[1] = l;
  14353. l = h;
  14354. h = o;
  14355. o = 0;
  14356. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14357. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14358. t[2] = l;
  14359. l = h;
  14360. h = o;
  14361. o = 0;
  14362. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14363. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14364. t[3] = l;
  14365. l = h;
  14366. h = o;
  14367. o = 0;
  14368. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14369. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14370. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14371. t[4] = l;
  14372. l = h;
  14373. h = o;
  14374. o = 0;
  14375. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14376. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14377. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14378. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14379. t[5] = l;
  14380. l = h;
  14381. h = o;
  14382. o = 0;
  14383. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  14384. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  14385. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  14386. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14387. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14388. t[6] = l;
  14389. l = h;
  14390. h = o;
  14391. o = 0;
  14392. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  14393. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  14394. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  14395. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  14396. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14397. t[7] = l;
  14398. l = h;
  14399. h = o;
  14400. o = 0;
  14401. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  14402. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  14403. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  14404. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  14405. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14406. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14407. t[8] = l;
  14408. l = h;
  14409. h = o;
  14410. o = 0;
  14411. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  14412. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  14413. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  14414. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  14415. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  14416. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14417. t[9] = l;
  14418. l = h;
  14419. h = o;
  14420. o = 0;
  14421. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  14422. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  14423. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  14424. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  14425. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  14426. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  14427. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14428. t[10] = l;
  14429. l = h;
  14430. h = o;
  14431. o = 0;
  14432. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  14433. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  14434. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  14435. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  14436. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  14437. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  14438. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14439. t[11] = l;
  14440. l = h;
  14441. h = o;
  14442. o = 0;
  14443. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[11]);
  14444. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  14445. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  14446. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  14447. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  14448. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  14449. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14450. r->dp[12] = l;
  14451. l = h;
  14452. h = o;
  14453. o = 0;
  14454. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[11]);
  14455. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  14456. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  14457. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  14458. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  14459. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14460. r->dp[13] = l;
  14461. l = h;
  14462. h = o;
  14463. o = 0;
  14464. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[11]);
  14465. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  14466. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  14467. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  14468. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  14469. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14470. r->dp[14] = l;
  14471. l = h;
  14472. h = o;
  14473. o = 0;
  14474. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[11]);
  14475. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  14476. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  14477. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  14478. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14479. r->dp[15] = l;
  14480. l = h;
  14481. h = o;
  14482. o = 0;
  14483. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[11]);
  14484. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  14485. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  14486. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  14487. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14488. r->dp[16] = l;
  14489. l = h;
  14490. h = o;
  14491. o = 0;
  14492. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[11]);
  14493. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  14494. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  14495. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14496. r->dp[17] = l;
  14497. l = h;
  14498. h = o;
  14499. o = 0;
  14500. SP_ASM_MUL_ADD2(l, h, o, a->dp[7], a->dp[11]);
  14501. SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[10]);
  14502. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  14503. r->dp[18] = l;
  14504. l = h;
  14505. h = o;
  14506. o = 0;
  14507. SP_ASM_MUL_ADD2(l, h, o, a->dp[8], a->dp[11]);
  14508. SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[10]);
  14509. r->dp[19] = l;
  14510. l = h;
  14511. h = o;
  14512. o = 0;
  14513. SP_ASM_MUL_ADD2(l, h, o, a->dp[9], a->dp[11]);
  14514. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  14515. r->dp[20] = l;
  14516. l = h;
  14517. h = o;
  14518. o = 0;
  14519. SP_ASM_MUL_ADD2(l, h, o, a->dp[10], a->dp[11]);
  14520. r->dp[21] = l;
  14521. l = h;
  14522. h = o;
  14523. SP_ASM_SQR_ADD_NO(l, h, a->dp[11]);
  14524. r->dp[22] = l;
  14525. r->dp[23] = h;
  14526. XMEMCPY(r->dp, t, 12 * sizeof(sp_int_digit));
  14527. r->used = 24;
  14528. sp_clamp(r);
  14529. return MP_OKAY;
  14530. }
  14531. #endif /* SQR_MUL_ASM */
  14532. #endif /* SP_WORD_SIZE == 32 */
  14533. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  14534. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  14535. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  14536. (SP_WORD_SIZE == 64)))
  14537. #if SP_INT_DIGITS >= 32
  14538. /* Square a and store in r. r = a * a
  14539. *
  14540. * Comba implementation.
  14541. *
  14542. * @param [in] a SP integer to square.
  14543. * @param [out] r SP integer result.
  14544. *
  14545. * @return MP_OKAY on success.
  14546. * @return MP_MEM when dynamic memory allocation fails.
  14547. */
  14548. static int _sp_sqr_16(const sp_int* a, sp_int* r)
  14549. {
  14550. int err = MP_OKAY;
  14551. sp_int_digit l = 0;
  14552. sp_int_digit h = 0;
  14553. sp_int_digit o = 0;
  14554. sp_int_digit tl = 0;
  14555. sp_int_digit th = 0;
  14556. sp_int_digit to;
  14557. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14558. sp_int_digit* t = NULL;
  14559. #else
  14560. sp_int_digit t[16];
  14561. #endif
  14562. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14563. to = 0;
  14564. #endif
  14565. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14566. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 16, NULL,
  14567. DYNAMIC_TYPE_BIGINT);
  14568. if (t == NULL) {
  14569. err = MP_MEM;
  14570. }
  14571. #endif
  14572. if (err == MP_OKAY) {
  14573. SP_ASM_SQR(h, l, a->dp[0]);
  14574. t[0] = h;
  14575. h = 0;
  14576. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14577. t[1] = l;
  14578. l = h;
  14579. h = o;
  14580. o = 0;
  14581. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14582. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14583. t[2] = l;
  14584. l = h;
  14585. h = o;
  14586. o = 0;
  14587. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14588. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14589. t[3] = l;
  14590. l = h;
  14591. h = o;
  14592. o = 0;
  14593. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14594. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14595. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14596. t[4] = l;
  14597. l = h;
  14598. h = o;
  14599. o = 0;
  14600. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14601. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14602. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14603. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14604. t[5] = l;
  14605. l = h;
  14606. h = o;
  14607. o = 0;
  14608. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  14609. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  14610. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  14611. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14612. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14613. t[6] = l;
  14614. l = h;
  14615. h = o;
  14616. o = 0;
  14617. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  14618. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  14619. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  14620. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  14621. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14622. t[7] = l;
  14623. l = h;
  14624. h = o;
  14625. o = 0;
  14626. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  14627. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  14628. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  14629. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  14630. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14631. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14632. t[8] = l;
  14633. l = h;
  14634. h = o;
  14635. o = 0;
  14636. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  14637. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  14638. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  14639. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  14640. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  14641. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14642. t[9] = l;
  14643. l = h;
  14644. h = o;
  14645. o = 0;
  14646. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  14647. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  14648. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  14649. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  14650. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  14651. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  14652. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14653. t[10] = l;
  14654. l = h;
  14655. h = o;
  14656. o = 0;
  14657. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  14658. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  14659. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  14660. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  14661. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  14662. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  14663. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14664. t[11] = l;
  14665. l = h;
  14666. h = o;
  14667. o = 0;
  14668. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
  14669. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
  14670. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  14671. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  14672. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  14673. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  14674. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  14675. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14676. t[12] = l;
  14677. l = h;
  14678. h = o;
  14679. o = 0;
  14680. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
  14681. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
  14682. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
  14683. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  14684. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  14685. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  14686. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  14687. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14688. t[13] = l;
  14689. l = h;
  14690. h = o;
  14691. o = 0;
  14692. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
  14693. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
  14694. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
  14695. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
  14696. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  14697. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  14698. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  14699. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  14700. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14701. t[14] = l;
  14702. l = h;
  14703. h = o;
  14704. o = 0;
  14705. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
  14706. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
  14707. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
  14708. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
  14709. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
  14710. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  14711. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  14712. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  14713. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14714. t[15] = l;
  14715. l = h;
  14716. h = o;
  14717. o = 0;
  14718. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[15]);
  14719. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
  14720. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
  14721. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
  14722. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
  14723. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  14724. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  14725. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  14726. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14727. r->dp[16] = l;
  14728. l = h;
  14729. h = o;
  14730. o = 0;
  14731. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[15]);
  14732. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
  14733. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
  14734. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
  14735. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
  14736. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  14737. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  14738. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14739. r->dp[17] = l;
  14740. l = h;
  14741. h = o;
  14742. o = 0;
  14743. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[15]);
  14744. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
  14745. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
  14746. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
  14747. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
  14748. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
  14749. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  14750. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14751. r->dp[18] = l;
  14752. l = h;
  14753. h = o;
  14754. o = 0;
  14755. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[15]);
  14756. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
  14757. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
  14758. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
  14759. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
  14760. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
  14761. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14762. r->dp[19] = l;
  14763. l = h;
  14764. h = o;
  14765. o = 0;
  14766. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[15]);
  14767. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
  14768. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
  14769. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
  14770. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
  14771. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  14772. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14773. r->dp[20] = l;
  14774. l = h;
  14775. h = o;
  14776. o = 0;
  14777. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[15]);
  14778. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
  14779. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
  14780. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
  14781. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
  14782. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14783. r->dp[21] = l;
  14784. l = h;
  14785. h = o;
  14786. o = 0;
  14787. SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[15]);
  14788. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
  14789. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
  14790. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
  14791. SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
  14792. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14793. r->dp[22] = l;
  14794. l = h;
  14795. h = o;
  14796. o = 0;
  14797. SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[15]);
  14798. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
  14799. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
  14800. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
  14801. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14802. r->dp[23] = l;
  14803. l = h;
  14804. h = o;
  14805. o = 0;
  14806. SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[15]);
  14807. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
  14808. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
  14809. SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
  14810. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14811. r->dp[24] = l;
  14812. l = h;
  14813. h = o;
  14814. o = 0;
  14815. SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[15]);
  14816. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
  14817. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
  14818. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14819. r->dp[25] = l;
  14820. l = h;
  14821. h = o;
  14822. o = 0;
  14823. SP_ASM_MUL_ADD2(l, h, o, a->dp[11], a->dp[15]);
  14824. SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[14]);
  14825. SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
  14826. r->dp[26] = l;
  14827. l = h;
  14828. h = o;
  14829. o = 0;
  14830. SP_ASM_MUL_ADD2(l, h, o, a->dp[12], a->dp[15]);
  14831. SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[14]);
  14832. r->dp[27] = l;
  14833. l = h;
  14834. h = o;
  14835. o = 0;
  14836. SP_ASM_MUL_ADD2(l, h, o, a->dp[13], a->dp[15]);
  14837. SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
  14838. r->dp[28] = l;
  14839. l = h;
  14840. h = o;
  14841. o = 0;
  14842. SP_ASM_MUL_ADD2(l, h, o, a->dp[14], a->dp[15]);
  14843. r->dp[29] = l;
  14844. l = h;
  14845. h = o;
  14846. SP_ASM_SQR_ADD_NO(l, h, a->dp[15]);
  14847. r->dp[30] = l;
  14848. r->dp[31] = h;
  14849. XMEMCPY(r->dp, t, 16 * sizeof(sp_int_digit));
  14850. r->used = 32;
  14851. sp_clamp(r);
  14852. }
  14853. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14854. if (t != NULL) {
  14855. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  14856. }
  14857. #endif
  14858. return err;
  14859. }
  14860. #endif /* SP_INT_DIGITS >= 32 */
  14861. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  14862. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  14863. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  14864. #if SP_INT_DIGITS >= 48
  14865. /* Square a and store in r. r = a * a
  14866. *
  14867. * Comba implementation.
  14868. *
  14869. * @param [in] a SP integer to square.
  14870. * @param [out] r SP integer result.
  14871. *
  14872. * @return MP_OKAY on success.
  14873. * @return MP_MEM when dynamic memory allocation fails.
  14874. */
  14875. static int _sp_sqr_24(const sp_int* a, sp_int* r)
  14876. {
  14877. int err = MP_OKAY;
  14878. sp_int_digit l = 0;
  14879. sp_int_digit h = 0;
  14880. sp_int_digit o = 0;
  14881. sp_int_digit tl = 0;
  14882. sp_int_digit th = 0;
  14883. sp_int_digit to;
  14884. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14885. sp_int_digit* t = NULL;
  14886. #else
  14887. sp_int_digit t[24];
  14888. #endif
  14889. #if defined(WOLFSSL_SP_ARM_THUMB) && SP_WORD_SIZE == 32
  14890. to = 0;
  14891. #endif
  14892. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  14893. t = (sp_int_digit*)XMALLOC(sizeof(sp_int_digit) * 24, NULL,
  14894. DYNAMIC_TYPE_BIGINT);
  14895. if (t == NULL) {
  14896. err = MP_MEM;
  14897. }
  14898. #endif
  14899. if (err == MP_OKAY) {
  14900. SP_ASM_SQR(h, l, a->dp[0]);
  14901. t[0] = h;
  14902. h = 0;
  14903. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[1]);
  14904. t[1] = l;
  14905. l = h;
  14906. h = o;
  14907. o = 0;
  14908. SP_ASM_MUL_ADD2_NO(l, h, o, a->dp[0], a->dp[2]);
  14909. SP_ASM_SQR_ADD(l, h, o, a->dp[1]);
  14910. t[2] = l;
  14911. l = h;
  14912. h = o;
  14913. o = 0;
  14914. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[3]);
  14915. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[2]);
  14916. t[3] = l;
  14917. l = h;
  14918. h = o;
  14919. o = 0;
  14920. SP_ASM_MUL_ADD2(l, h, o, a->dp[0], a->dp[4]);
  14921. SP_ASM_MUL_ADD2(l, h, o, a->dp[1], a->dp[3]);
  14922. SP_ASM_SQR_ADD(l, h, o, a->dp[2]);
  14923. t[4] = l;
  14924. l = h;
  14925. h = o;
  14926. o = 0;
  14927. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[5]);
  14928. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[4]);
  14929. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[3]);
  14930. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14931. t[5] = l;
  14932. l = h;
  14933. h = o;
  14934. o = 0;
  14935. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[6]);
  14936. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[5]);
  14937. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[4]);
  14938. SP_ASM_SQR_ADD(l, h, o, a->dp[3]);
  14939. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14940. t[6] = l;
  14941. l = h;
  14942. h = o;
  14943. o = 0;
  14944. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[7]);
  14945. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[6]);
  14946. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[5]);
  14947. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[4]);
  14948. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14949. t[7] = l;
  14950. l = h;
  14951. h = o;
  14952. o = 0;
  14953. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[8]);
  14954. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[7]);
  14955. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[6]);
  14956. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[5]);
  14957. SP_ASM_SQR_ADD(l, h, o, a->dp[4]);
  14958. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14959. t[8] = l;
  14960. l = h;
  14961. h = o;
  14962. o = 0;
  14963. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[9]);
  14964. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[8]);
  14965. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[7]);
  14966. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[6]);
  14967. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[5]);
  14968. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14969. t[9] = l;
  14970. l = h;
  14971. h = o;
  14972. o = 0;
  14973. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[10]);
  14974. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[9]);
  14975. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[8]);
  14976. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[7]);
  14977. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[6]);
  14978. SP_ASM_SQR_ADD(l, h, o, a->dp[5]);
  14979. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14980. t[10] = l;
  14981. l = h;
  14982. h = o;
  14983. o = 0;
  14984. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[11]);
  14985. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[10]);
  14986. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[9]);
  14987. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[8]);
  14988. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[7]);
  14989. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[6]);
  14990. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  14991. t[11] = l;
  14992. l = h;
  14993. h = o;
  14994. o = 0;
  14995. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[12]);
  14996. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[11]);
  14997. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[10]);
  14998. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[9]);
  14999. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[8]);
  15000. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[7]);
  15001. SP_ASM_SQR_ADD(l, h, o, a->dp[6]);
  15002. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15003. t[12] = l;
  15004. l = h;
  15005. h = o;
  15006. o = 0;
  15007. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[13]);
  15008. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[12]);
  15009. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[11]);
  15010. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[10]);
  15011. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[9]);
  15012. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[8]);
  15013. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[7]);
  15014. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15015. t[13] = l;
  15016. l = h;
  15017. h = o;
  15018. o = 0;
  15019. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[14]);
  15020. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[13]);
  15021. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[12]);
  15022. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[11]);
  15023. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[10]);
  15024. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[9]);
  15025. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[8]);
  15026. SP_ASM_SQR_ADD(l, h, o, a->dp[7]);
  15027. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15028. t[14] = l;
  15029. l = h;
  15030. h = o;
  15031. o = 0;
  15032. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[15]);
  15033. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[14]);
  15034. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[13]);
  15035. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[12]);
  15036. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[11]);
  15037. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[10]);
  15038. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[9]);
  15039. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[8]);
  15040. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15041. t[15] = l;
  15042. l = h;
  15043. h = o;
  15044. o = 0;
  15045. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[16]);
  15046. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[15]);
  15047. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[14]);
  15048. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[13]);
  15049. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[12]);
  15050. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[11]);
  15051. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[10]);
  15052. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[9]);
  15053. SP_ASM_SQR_ADD(l, h, o, a->dp[8]);
  15054. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15055. t[16] = l;
  15056. l = h;
  15057. h = o;
  15058. o = 0;
  15059. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[17]);
  15060. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[16]);
  15061. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[15]);
  15062. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[14]);
  15063. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[13]);
  15064. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[12]);
  15065. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[11]);
  15066. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[10]);
  15067. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[9]);
  15068. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15069. t[17] = l;
  15070. l = h;
  15071. h = o;
  15072. o = 0;
  15073. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[18]);
  15074. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[17]);
  15075. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[16]);
  15076. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[15]);
  15077. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[14]);
  15078. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[13]);
  15079. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[12]);
  15080. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[11]);
  15081. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[10]);
  15082. SP_ASM_SQR_ADD(l, h, o, a->dp[9]);
  15083. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15084. t[18] = l;
  15085. l = h;
  15086. h = o;
  15087. o = 0;
  15088. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[19]);
  15089. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[18]);
  15090. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[17]);
  15091. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[16]);
  15092. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[15]);
  15093. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[14]);
  15094. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[13]);
  15095. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[12]);
  15096. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[11]);
  15097. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[10]);
  15098. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15099. t[19] = l;
  15100. l = h;
  15101. h = o;
  15102. o = 0;
  15103. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[20]);
  15104. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[19]);
  15105. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[18]);
  15106. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[17]);
  15107. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[16]);
  15108. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[15]);
  15109. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[14]);
  15110. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[13]);
  15111. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[12]);
  15112. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[11]);
  15113. SP_ASM_SQR_ADD(l, h, o, a->dp[10]);
  15114. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15115. t[20] = l;
  15116. l = h;
  15117. h = o;
  15118. o = 0;
  15119. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[21]);
  15120. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[20]);
  15121. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[19]);
  15122. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[18]);
  15123. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[17]);
  15124. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[16]);
  15125. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[15]);
  15126. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[14]);
  15127. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[13]);
  15128. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[12]);
  15129. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[11]);
  15130. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15131. t[21] = l;
  15132. l = h;
  15133. h = o;
  15134. o = 0;
  15135. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[22]);
  15136. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[21]);
  15137. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[20]);
  15138. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[19]);
  15139. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[18]);
  15140. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[17]);
  15141. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[16]);
  15142. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[15]);
  15143. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[14]);
  15144. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[13]);
  15145. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[12]);
  15146. SP_ASM_SQR_ADD(l, h, o, a->dp[11]);
  15147. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15148. t[22] = l;
  15149. l = h;
  15150. h = o;
  15151. o = 0;
  15152. SP_ASM_MUL_SET(tl, th, to, a->dp[0], a->dp[23]);
  15153. SP_ASM_MUL_ADD(tl, th, to, a->dp[1], a->dp[22]);
  15154. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[21]);
  15155. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[20]);
  15156. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[19]);
  15157. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[18]);
  15158. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[17]);
  15159. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[16]);
  15160. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[15]);
  15161. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[14]);
  15162. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[13]);
  15163. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[12]);
  15164. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15165. t[23] = l;
  15166. l = h;
  15167. h = o;
  15168. o = 0;
  15169. SP_ASM_MUL_SET(tl, th, to, a->dp[1], a->dp[23]);
  15170. SP_ASM_MUL_ADD(tl, th, to, a->dp[2], a->dp[22]);
  15171. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[21]);
  15172. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[20]);
  15173. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[19]);
  15174. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[18]);
  15175. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[17]);
  15176. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[16]);
  15177. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[15]);
  15178. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[14]);
  15179. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[13]);
  15180. SP_ASM_SQR_ADD(l, h, o, a->dp[12]);
  15181. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15182. r->dp[24] = l;
  15183. l = h;
  15184. h = o;
  15185. o = 0;
  15186. SP_ASM_MUL_SET(tl, th, to, a->dp[2], a->dp[23]);
  15187. SP_ASM_MUL_ADD(tl, th, to, a->dp[3], a->dp[22]);
  15188. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[21]);
  15189. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[20]);
  15190. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[19]);
  15191. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[18]);
  15192. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[17]);
  15193. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[16]);
  15194. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[15]);
  15195. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[14]);
  15196. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[13]);
  15197. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15198. r->dp[25] = l;
  15199. l = h;
  15200. h = o;
  15201. o = 0;
  15202. SP_ASM_MUL_SET(tl, th, to, a->dp[3], a->dp[23]);
  15203. SP_ASM_MUL_ADD(tl, th, to, a->dp[4], a->dp[22]);
  15204. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[21]);
  15205. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[20]);
  15206. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[19]);
  15207. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[18]);
  15208. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[17]);
  15209. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[16]);
  15210. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[15]);
  15211. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[14]);
  15212. SP_ASM_SQR_ADD(l, h, o, a->dp[13]);
  15213. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15214. r->dp[26] = l;
  15215. l = h;
  15216. h = o;
  15217. o = 0;
  15218. SP_ASM_MUL_SET(tl, th, to, a->dp[4], a->dp[23]);
  15219. SP_ASM_MUL_ADD(tl, th, to, a->dp[5], a->dp[22]);
  15220. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[21]);
  15221. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[20]);
  15222. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[19]);
  15223. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[18]);
  15224. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[17]);
  15225. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[16]);
  15226. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[15]);
  15227. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[14]);
  15228. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15229. r->dp[27] = l;
  15230. l = h;
  15231. h = o;
  15232. o = 0;
  15233. SP_ASM_MUL_SET(tl, th, to, a->dp[5], a->dp[23]);
  15234. SP_ASM_MUL_ADD(tl, th, to, a->dp[6], a->dp[22]);
  15235. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[21]);
  15236. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[20]);
  15237. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[19]);
  15238. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[18]);
  15239. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[17]);
  15240. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[16]);
  15241. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[15]);
  15242. SP_ASM_SQR_ADD(l, h, o, a->dp[14]);
  15243. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15244. r->dp[28] = l;
  15245. l = h;
  15246. h = o;
  15247. o = 0;
  15248. SP_ASM_MUL_SET(tl, th, to, a->dp[6], a->dp[23]);
  15249. SP_ASM_MUL_ADD(tl, th, to, a->dp[7], a->dp[22]);
  15250. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[21]);
  15251. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[20]);
  15252. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[19]);
  15253. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[18]);
  15254. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[17]);
  15255. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[16]);
  15256. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[15]);
  15257. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15258. r->dp[29] = l;
  15259. l = h;
  15260. h = o;
  15261. o = 0;
  15262. SP_ASM_MUL_SET(tl, th, to, a->dp[7], a->dp[23]);
  15263. SP_ASM_MUL_ADD(tl, th, to, a->dp[8], a->dp[22]);
  15264. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[21]);
  15265. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[20]);
  15266. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[19]);
  15267. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[18]);
  15268. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[17]);
  15269. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[16]);
  15270. SP_ASM_SQR_ADD(l, h, o, a->dp[15]);
  15271. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15272. r->dp[30] = l;
  15273. l = h;
  15274. h = o;
  15275. o = 0;
  15276. SP_ASM_MUL_SET(tl, th, to, a->dp[8], a->dp[23]);
  15277. SP_ASM_MUL_ADD(tl, th, to, a->dp[9], a->dp[22]);
  15278. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[21]);
  15279. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[20]);
  15280. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[19]);
  15281. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[18]);
  15282. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[17]);
  15283. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[16]);
  15284. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15285. r->dp[31] = l;
  15286. l = h;
  15287. h = o;
  15288. o = 0;
  15289. SP_ASM_MUL_SET(tl, th, to, a->dp[9], a->dp[23]);
  15290. SP_ASM_MUL_ADD(tl, th, to, a->dp[10], a->dp[22]);
  15291. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[21]);
  15292. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[20]);
  15293. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[19]);
  15294. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[18]);
  15295. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[17]);
  15296. SP_ASM_SQR_ADD(l, h, o, a->dp[16]);
  15297. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15298. r->dp[32] = l;
  15299. l = h;
  15300. h = o;
  15301. o = 0;
  15302. SP_ASM_MUL_SET(tl, th, to, a->dp[10], a->dp[23]);
  15303. SP_ASM_MUL_ADD(tl, th, to, a->dp[11], a->dp[22]);
  15304. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[21]);
  15305. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[20]);
  15306. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[19]);
  15307. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[18]);
  15308. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[17]);
  15309. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15310. r->dp[33] = l;
  15311. l = h;
  15312. h = o;
  15313. o = 0;
  15314. SP_ASM_MUL_SET(tl, th, to, a->dp[11], a->dp[23]);
  15315. SP_ASM_MUL_ADD(tl, th, to, a->dp[12], a->dp[22]);
  15316. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[21]);
  15317. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[20]);
  15318. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[19]);
  15319. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[18]);
  15320. SP_ASM_SQR_ADD(l, h, o, a->dp[17]);
  15321. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15322. r->dp[34] = l;
  15323. l = h;
  15324. h = o;
  15325. o = 0;
  15326. SP_ASM_MUL_SET(tl, th, to, a->dp[12], a->dp[23]);
  15327. SP_ASM_MUL_ADD(tl, th, to, a->dp[13], a->dp[22]);
  15328. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[21]);
  15329. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[20]);
  15330. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[19]);
  15331. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[18]);
  15332. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15333. r->dp[35] = l;
  15334. l = h;
  15335. h = o;
  15336. o = 0;
  15337. SP_ASM_MUL_SET(tl, th, to, a->dp[13], a->dp[23]);
  15338. SP_ASM_MUL_ADD(tl, th, to, a->dp[14], a->dp[22]);
  15339. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[21]);
  15340. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[20]);
  15341. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[19]);
  15342. SP_ASM_SQR_ADD(l, h, o, a->dp[18]);
  15343. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15344. r->dp[36] = l;
  15345. l = h;
  15346. h = o;
  15347. o = 0;
  15348. SP_ASM_MUL_SET(tl, th, to, a->dp[14], a->dp[23]);
  15349. SP_ASM_MUL_ADD(tl, th, to, a->dp[15], a->dp[22]);
  15350. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[21]);
  15351. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[20]);
  15352. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[19]);
  15353. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15354. r->dp[37] = l;
  15355. l = h;
  15356. h = o;
  15357. o = 0;
  15358. SP_ASM_MUL_SET(tl, th, to, a->dp[15], a->dp[23]);
  15359. SP_ASM_MUL_ADD(tl, th, to, a->dp[16], a->dp[22]);
  15360. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[21]);
  15361. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[20]);
  15362. SP_ASM_SQR_ADD(l, h, o, a->dp[19]);
  15363. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15364. r->dp[38] = l;
  15365. l = h;
  15366. h = o;
  15367. o = 0;
  15368. SP_ASM_MUL_SET(tl, th, to, a->dp[16], a->dp[23]);
  15369. SP_ASM_MUL_ADD(tl, th, to, a->dp[17], a->dp[22]);
  15370. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[21]);
  15371. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[20]);
  15372. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15373. r->dp[39] = l;
  15374. l = h;
  15375. h = o;
  15376. o = 0;
  15377. SP_ASM_MUL_SET(tl, th, to, a->dp[17], a->dp[23]);
  15378. SP_ASM_MUL_ADD(tl, th, to, a->dp[18], a->dp[22]);
  15379. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[21]);
  15380. SP_ASM_SQR_ADD(l, h, o, a->dp[20]);
  15381. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15382. r->dp[40] = l;
  15383. l = h;
  15384. h = o;
  15385. o = 0;
  15386. SP_ASM_MUL_SET(tl, th, to, a->dp[18], a->dp[23]);
  15387. SP_ASM_MUL_ADD(tl, th, to, a->dp[19], a->dp[22]);
  15388. SP_ASM_MUL_ADD(tl, th, to, a->dp[20], a->dp[21]);
  15389. SP_ASM_ADD_DBL_3(l, h, o, tl, th, to);
  15390. r->dp[41] = l;
  15391. l = h;
  15392. h = o;
  15393. o = 0;
  15394. SP_ASM_MUL_ADD2(l, h, o, a->dp[19], a->dp[23]);
  15395. SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[22]);
  15396. SP_ASM_SQR_ADD(l, h, o, a->dp[21]);
  15397. r->dp[42] = l;
  15398. l = h;
  15399. h = o;
  15400. o = 0;
  15401. SP_ASM_MUL_ADD2(l, h, o, a->dp[20], a->dp[23]);
  15402. SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[22]);
  15403. r->dp[43] = l;
  15404. l = h;
  15405. h = o;
  15406. o = 0;
  15407. SP_ASM_MUL_ADD2(l, h, o, a->dp[21], a->dp[23]);
  15408. SP_ASM_SQR_ADD(l, h, o, a->dp[22]);
  15409. r->dp[44] = l;
  15410. l = h;
  15411. h = o;
  15412. o = 0;
  15413. SP_ASM_MUL_ADD2(l, h, o, a->dp[22], a->dp[23]);
  15414. r->dp[45] = l;
  15415. l = h;
  15416. h = o;
  15417. SP_ASM_SQR_ADD_NO(l, h, a->dp[23]);
  15418. r->dp[46] = l;
  15419. r->dp[47] = h;
  15420. XMEMCPY(r->dp, t, 24 * sizeof(sp_int_digit));
  15421. r->used = 48;
  15422. sp_clamp(r);
  15423. }
  15424. #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
  15425. if (t != NULL) {
  15426. XFREE(t, NULL, DYNAMIC_TYPE_BIGINT);
  15427. }
  15428. #endif
  15429. return err;
  15430. }
  15431. #endif /* SP_INT_DIGITS >= 48 */
  15432. #if SP_INT_DIGITS >= 64
  15433. /* Square a and store in r. r = a * a
  15434. *
  15435. * Karatsuba implementation.
  15436. *
  15437. * @param [in] a SP integer to square.
  15438. * @param [out] r SP integer result.
  15439. *
  15440. * @return MP_OKAY on success.
  15441. * @return MP_MEM when dynamic memory allocation fails.
  15442. */
  15443. static int _sp_sqr_32(const sp_int* a, sp_int* r)
  15444. {
  15445. int err = MP_OKAY;
  15446. unsigned int i;
  15447. sp_int_digit l;
  15448. sp_int_digit h;
  15449. sp_int* z0;
  15450. sp_int* z1;
  15451. sp_int* z2;
  15452. sp_int_digit ca;
  15453. DECL_SP_INT(a1, 16);
  15454. DECL_SP_INT_ARRAY(z, 33, 2);
  15455. ALLOC_SP_INT(a1, 16, err, NULL);
  15456. ALLOC_SP_INT_ARRAY(z, 33, 2, err, NULL);
  15457. if (err == MP_OKAY) {
  15458. z1 = z[0];
  15459. z2 = z[1];
  15460. z0 = r;
  15461. XMEMCPY(a1->dp, &a->dp[16], sizeof(sp_int_digit) * 16);
  15462. a1->used = 16;
  15463. /* z2 = a1 ^ 2 */
  15464. err = _sp_sqr_16(a1, z2);
  15465. }
  15466. if (err == MP_OKAY) {
  15467. l = 0;
  15468. h = 0;
  15469. for (i = 0; i < 16; i++) {
  15470. SP_ASM_ADDC(l, h, a1->dp[i]);
  15471. SP_ASM_ADDC(l, h, a->dp[i]);
  15472. a1->dp[i] = l;
  15473. l = h;
  15474. h = 0;
  15475. }
  15476. ca = l;
  15477. /* z0 = a0 ^ 2 */
  15478. err = _sp_sqr_16(a, z0);
  15479. }
  15480. if (err == MP_OKAY) {
  15481. /* z1 = (a0 + a1) ^ 2 */
  15482. err = _sp_sqr_16(a1, z1);
  15483. }
  15484. if (err == MP_OKAY) {
  15485. /* r = (z2 << 32) + (z1 - z0 - z2) << 16) + z0 */
  15486. /* r = z0 */
  15487. /* r += (z1 - z0 - z2) << 16 */
  15488. z1->dp[32] = ca;
  15489. l = 0;
  15490. if (ca) {
  15491. l = z1->dp[0 + 16];
  15492. h = 0;
  15493. SP_ASM_ADDC(l, h, a1->dp[0]);
  15494. SP_ASM_ADDC(l, h, a1->dp[0]);
  15495. z1->dp[0 + 16] = l;
  15496. l = h;
  15497. h = 0;
  15498. for (i = 1; i < 16; i++) {
  15499. SP_ASM_ADDC(l, h, z1->dp[i + 16]);
  15500. SP_ASM_ADDC(l, h, a1->dp[i]);
  15501. SP_ASM_ADDC(l, h, a1->dp[i]);
  15502. z1->dp[i + 16] = l;
  15503. l = h;
  15504. h = 0;
  15505. }
  15506. }
  15507. z1->dp[32] += l;
  15508. /* z1 = z1 - z0 - z1 */
  15509. l = z1->dp[0];
  15510. h = 0;
  15511. SP_ASM_SUBB(l, h, z0->dp[0]);
  15512. SP_ASM_SUBB(l, h, z2->dp[0]);
  15513. z1->dp[0] = l;
  15514. l = h;
  15515. h = 0;
  15516. for (i = 1; i < 32; i++) {
  15517. l += z1->dp[i];
  15518. SP_ASM_SUBB(l, h, z0->dp[i]);
  15519. SP_ASM_SUBB(l, h, z2->dp[i]);
  15520. z1->dp[i] = l;
  15521. l = h;
  15522. h = 0;
  15523. }
  15524. z1->dp[i] += l;
  15525. /* r += z1 << 16 */
  15526. l = 0;
  15527. h = 0;
  15528. for (i = 0; i < 16; i++) {
  15529. SP_ASM_ADDC(l, h, r->dp[i + 16]);
  15530. SP_ASM_ADDC(l, h, z1->dp[i]);
  15531. r->dp[i + 16] = l;
  15532. l = h;
  15533. h = 0;
  15534. }
  15535. for (; i < 33; i++) {
  15536. SP_ASM_ADDC(l, h, z1->dp[i]);
  15537. r->dp[i + 16] = l;
  15538. l = h;
  15539. h = 0;
  15540. }
  15541. /* r += z2 << 32 */
  15542. l = 0;
  15543. h = 0;
  15544. for (i = 0; i < 17; i++) {
  15545. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  15546. SP_ASM_ADDC(l, h, z2->dp[i]);
  15547. r->dp[i + 32] = l;
  15548. l = h;
  15549. h = 0;
  15550. }
  15551. for (; i < 32; i++) {
  15552. SP_ASM_ADDC(l, h, z2->dp[i]);
  15553. r->dp[i + 32] = l;
  15554. l = h;
  15555. h = 0;
  15556. }
  15557. r->used = 64;
  15558. sp_clamp(r);
  15559. }
  15560. FREE_SP_INT_ARRAY(z, NULL);
  15561. FREE_SP_INT(a1, NULL);
  15562. return err;
  15563. }
  15564. #endif /* SP_INT_DIGITS >= 64 */
  15565. #if SP_INT_DIGITS >= 96
  15566. /* Square a and store in r. r = a * a
  15567. *
  15568. * Karatsuba implementation.
  15569. *
  15570. * @param [in] a SP integer to square.
  15571. * @param [out] r SP integer result.
  15572. *
  15573. * @return MP_OKAY on success.
  15574. * @return MP_MEM when dynamic memory allocation fails.
  15575. */
  15576. static int _sp_sqr_48(const sp_int* a, sp_int* r)
  15577. {
  15578. int err = MP_OKAY;
  15579. unsigned int i;
  15580. sp_int_digit l;
  15581. sp_int_digit h;
  15582. sp_int* z0;
  15583. sp_int* z1;
  15584. sp_int* z2;
  15585. sp_int_digit ca;
  15586. DECL_SP_INT(a1, 24);
  15587. DECL_SP_INT_ARRAY(z, 49, 2);
  15588. ALLOC_SP_INT(a1, 24, err, NULL);
  15589. ALLOC_SP_INT_ARRAY(z, 49, 2, err, NULL);
  15590. if (err == MP_OKAY) {
  15591. z1 = z[0];
  15592. z2 = z[1];
  15593. z0 = r;
  15594. XMEMCPY(a1->dp, &a->dp[24], sizeof(sp_int_digit) * 24);
  15595. a1->used = 24;
  15596. /* z2 = a1 ^ 2 */
  15597. err = _sp_sqr_24(a1, z2);
  15598. }
  15599. if (err == MP_OKAY) {
  15600. l = 0;
  15601. h = 0;
  15602. for (i = 0; i < 24; i++) {
  15603. SP_ASM_ADDC(l, h, a1->dp[i]);
  15604. SP_ASM_ADDC(l, h, a->dp[i]);
  15605. a1->dp[i] = l;
  15606. l = h;
  15607. h = 0;
  15608. }
  15609. ca = l;
  15610. /* z0 = a0 ^ 2 */
  15611. err = _sp_sqr_24(a, z0);
  15612. }
  15613. if (err == MP_OKAY) {
  15614. /* z1 = (a0 + a1) ^ 2 */
  15615. err = _sp_sqr_24(a1, z1);
  15616. }
  15617. if (err == MP_OKAY) {
  15618. /* r = (z2 << 48) + (z1 - z0 - z2) << 24) + z0 */
  15619. /* r = z0 */
  15620. /* r += (z1 - z0 - z2) << 24 */
  15621. z1->dp[48] = ca;
  15622. l = 0;
  15623. if (ca) {
  15624. l = z1->dp[0 + 24];
  15625. h = 0;
  15626. SP_ASM_ADDC(l, h, a1->dp[0]);
  15627. SP_ASM_ADDC(l, h, a1->dp[0]);
  15628. z1->dp[0 + 24] = l;
  15629. l = h;
  15630. h = 0;
  15631. for (i = 1; i < 24; i++) {
  15632. SP_ASM_ADDC(l, h, z1->dp[i + 24]);
  15633. SP_ASM_ADDC(l, h, a1->dp[i]);
  15634. SP_ASM_ADDC(l, h, a1->dp[i]);
  15635. z1->dp[i + 24] = l;
  15636. l = h;
  15637. h = 0;
  15638. }
  15639. }
  15640. z1->dp[48] += l;
  15641. /* z1 = z1 - z0 - z1 */
  15642. l = z1->dp[0];
  15643. h = 0;
  15644. SP_ASM_SUBB(l, h, z0->dp[0]);
  15645. SP_ASM_SUBB(l, h, z2->dp[0]);
  15646. z1->dp[0] = l;
  15647. l = h;
  15648. h = 0;
  15649. for (i = 1; i < 48; i++) {
  15650. l += z1->dp[i];
  15651. SP_ASM_SUBB(l, h, z0->dp[i]);
  15652. SP_ASM_SUBB(l, h, z2->dp[i]);
  15653. z1->dp[i] = l;
  15654. l = h;
  15655. h = 0;
  15656. }
  15657. z1->dp[i] += l;
  15658. /* r += z1 << 16 */
  15659. l = 0;
  15660. h = 0;
  15661. for (i = 0; i < 24; i++) {
  15662. SP_ASM_ADDC(l, h, r->dp[i + 24]);
  15663. SP_ASM_ADDC(l, h, z1->dp[i]);
  15664. r->dp[i + 24] = l;
  15665. l = h;
  15666. h = 0;
  15667. }
  15668. for (; i < 49; i++) {
  15669. SP_ASM_ADDC(l, h, z1->dp[i]);
  15670. r->dp[i + 24] = l;
  15671. l = h;
  15672. h = 0;
  15673. }
  15674. /* r += z2 << 48 */
  15675. l = 0;
  15676. h = 0;
  15677. for (i = 0; i < 25; i++) {
  15678. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  15679. SP_ASM_ADDC(l, h, z2->dp[i]);
  15680. r->dp[i + 48] = l;
  15681. l = h;
  15682. h = 0;
  15683. }
  15684. for (; i < 48; i++) {
  15685. SP_ASM_ADDC(l, h, z2->dp[i]);
  15686. r->dp[i + 48] = l;
  15687. l = h;
  15688. h = 0;
  15689. }
  15690. r->used = 96;
  15691. sp_clamp(r);
  15692. }
  15693. FREE_SP_INT_ARRAY(z, NULL);
  15694. FREE_SP_INT(a1, NULL);
  15695. return err;
  15696. }
  15697. #endif /* SP_INT_DIGITS >= 96 */
  15698. #if SP_INT_DIGITS >= 128
  15699. /* Square a and store in r. r = a * a
  15700. *
  15701. * Karatsuba implementation.
  15702. *
  15703. * @param [in] a SP integer to square.
  15704. * @param [out] r SP integer result.
  15705. *
  15706. * @return MP_OKAY on success.
  15707. * @return MP_MEM when dynamic memory allocation fails.
  15708. */
  15709. static int _sp_sqr_64(const sp_int* a, sp_int* r)
  15710. {
  15711. int err = MP_OKAY;
  15712. unsigned int i;
  15713. sp_int_digit l;
  15714. sp_int_digit h;
  15715. sp_int* z0;
  15716. sp_int* z1;
  15717. sp_int* z2;
  15718. sp_int_digit ca;
  15719. DECL_SP_INT(a1, 32);
  15720. DECL_SP_INT_ARRAY(z, 65, 2);
  15721. ALLOC_SP_INT(a1, 32, err, NULL);
  15722. ALLOC_SP_INT_ARRAY(z, 65, 2, err, NULL);
  15723. if (err == MP_OKAY) {
  15724. z1 = z[0];
  15725. z2 = z[1];
  15726. z0 = r;
  15727. XMEMCPY(a1->dp, &a->dp[32], sizeof(sp_int_digit) * 32);
  15728. a1->used = 32;
  15729. /* z2 = a1 ^ 2 */
  15730. err = _sp_sqr_32(a1, z2);
  15731. }
  15732. if (err == MP_OKAY) {
  15733. l = 0;
  15734. h = 0;
  15735. for (i = 0; i < 32; i++) {
  15736. SP_ASM_ADDC(l, h, a1->dp[i]);
  15737. SP_ASM_ADDC(l, h, a->dp[i]);
  15738. a1->dp[i] = l;
  15739. l = h;
  15740. h = 0;
  15741. }
  15742. ca = l;
  15743. /* z0 = a0 ^ 2 */
  15744. err = _sp_sqr_32(a, z0);
  15745. }
  15746. if (err == MP_OKAY) {
  15747. /* z1 = (a0 + a1) ^ 2 */
  15748. err = _sp_sqr_32(a1, z1);
  15749. }
  15750. if (err == MP_OKAY) {
  15751. /* r = (z2 << 64) + (z1 - z0 - z2) << 32) + z0 */
  15752. /* r = z0 */
  15753. /* r += (z1 - z0 - z2) << 32 */
  15754. z1->dp[64] = ca;
  15755. l = 0;
  15756. if (ca) {
  15757. l = z1->dp[0 + 32];
  15758. h = 0;
  15759. SP_ASM_ADDC(l, h, a1->dp[0]);
  15760. SP_ASM_ADDC(l, h, a1->dp[0]);
  15761. z1->dp[0 + 32] = l;
  15762. l = h;
  15763. h = 0;
  15764. for (i = 1; i < 32; i++) {
  15765. SP_ASM_ADDC(l, h, z1->dp[i + 32]);
  15766. SP_ASM_ADDC(l, h, a1->dp[i]);
  15767. SP_ASM_ADDC(l, h, a1->dp[i]);
  15768. z1->dp[i + 32] = l;
  15769. l = h;
  15770. h = 0;
  15771. }
  15772. }
  15773. z1->dp[64] += l;
  15774. /* z1 = z1 - z0 - z1 */
  15775. l = z1->dp[0];
  15776. h = 0;
  15777. SP_ASM_SUBB(l, h, z0->dp[0]);
  15778. SP_ASM_SUBB(l, h, z2->dp[0]);
  15779. z1->dp[0] = l;
  15780. l = h;
  15781. h = 0;
  15782. for (i = 1; i < 64; i++) {
  15783. l += z1->dp[i];
  15784. SP_ASM_SUBB(l, h, z0->dp[i]);
  15785. SP_ASM_SUBB(l, h, z2->dp[i]);
  15786. z1->dp[i] = l;
  15787. l = h;
  15788. h = 0;
  15789. }
  15790. z1->dp[i] += l;
  15791. /* r += z1 << 16 */
  15792. l = 0;
  15793. h = 0;
  15794. for (i = 0; i < 32; i++) {
  15795. SP_ASM_ADDC(l, h, r->dp[i + 32]);
  15796. SP_ASM_ADDC(l, h, z1->dp[i]);
  15797. r->dp[i + 32] = l;
  15798. l = h;
  15799. h = 0;
  15800. }
  15801. for (; i < 65; i++) {
  15802. SP_ASM_ADDC(l, h, z1->dp[i]);
  15803. r->dp[i + 32] = l;
  15804. l = h;
  15805. h = 0;
  15806. }
  15807. /* r += z2 << 64 */
  15808. l = 0;
  15809. h = 0;
  15810. for (i = 0; i < 33; i++) {
  15811. SP_ASM_ADDC(l, h, r->dp[i + 64]);
  15812. SP_ASM_ADDC(l, h, z2->dp[i]);
  15813. r->dp[i + 64] = l;
  15814. l = h;
  15815. h = 0;
  15816. }
  15817. for (; i < 64; i++) {
  15818. SP_ASM_ADDC(l, h, z2->dp[i]);
  15819. r->dp[i + 64] = l;
  15820. l = h;
  15821. h = 0;
  15822. }
  15823. r->used = 128;
  15824. sp_clamp(r);
  15825. }
  15826. FREE_SP_INT_ARRAY(z, NULL);
  15827. FREE_SP_INT(a1, NULL);
  15828. return err;
  15829. }
  15830. #endif /* SP_INT_DIGITS >= 128 */
  15831. #if SP_INT_DIGITS >= 192
  15832. /* Square a and store in r. r = a * a
  15833. *
  15834. * Karatsuba implementation.
  15835. *
  15836. * @param [in] a SP integer to square.
  15837. * @param [out] r SP integer result.
  15838. *
  15839. * @return MP_OKAY on success.
  15840. * @return MP_MEM when dynamic memory allocation fails.
  15841. */
  15842. static int _sp_sqr_96(const sp_int* a, sp_int* r)
  15843. {
  15844. int err = MP_OKAY;
  15845. unsigned int i;
  15846. sp_int_digit l;
  15847. sp_int_digit h;
  15848. sp_int* z0;
  15849. sp_int* z1;
  15850. sp_int* z2;
  15851. sp_int_digit ca;
  15852. DECL_SP_INT(a1, 48);
  15853. DECL_SP_INT_ARRAY(z, 97, 2);
  15854. ALLOC_SP_INT(a1, 48, err, NULL);
  15855. ALLOC_SP_INT_ARRAY(z, 97, 2, err, NULL);
  15856. if (err == MP_OKAY) {
  15857. z1 = z[0];
  15858. z2 = z[1];
  15859. z0 = r;
  15860. XMEMCPY(a1->dp, &a->dp[48], sizeof(sp_int_digit) * 48);
  15861. a1->used = 48;
  15862. /* z2 = a1 ^ 2 */
  15863. err = _sp_sqr_48(a1, z2);
  15864. }
  15865. if (err == MP_OKAY) {
  15866. l = 0;
  15867. h = 0;
  15868. for (i = 0; i < 48; i++) {
  15869. SP_ASM_ADDC(l, h, a1->dp[i]);
  15870. SP_ASM_ADDC(l, h, a->dp[i]);
  15871. a1->dp[i] = l;
  15872. l = h;
  15873. h = 0;
  15874. }
  15875. ca = l;
  15876. /* z0 = a0 ^ 2 */
  15877. err = _sp_sqr_48(a, z0);
  15878. }
  15879. if (err == MP_OKAY) {
  15880. /* z1 = (a0 + a1) ^ 2 */
  15881. err = _sp_sqr_48(a1, z1);
  15882. }
  15883. if (err == MP_OKAY) {
  15884. /* r = (z2 << 96) + (z1 - z0 - z2) << 48) + z0 */
  15885. /* r = z0 */
  15886. /* r += (z1 - z0 - z2) << 48 */
  15887. z1->dp[96] = ca;
  15888. l = 0;
  15889. if (ca) {
  15890. l = z1->dp[0 + 48];
  15891. h = 0;
  15892. SP_ASM_ADDC(l, h, a1->dp[0]);
  15893. SP_ASM_ADDC(l, h, a1->dp[0]);
  15894. z1->dp[0 + 48] = l;
  15895. l = h;
  15896. h = 0;
  15897. for (i = 1; i < 48; i++) {
  15898. SP_ASM_ADDC(l, h, z1->dp[i + 48]);
  15899. SP_ASM_ADDC(l, h, a1->dp[i]);
  15900. SP_ASM_ADDC(l, h, a1->dp[i]);
  15901. z1->dp[i + 48] = l;
  15902. l = h;
  15903. h = 0;
  15904. }
  15905. }
  15906. z1->dp[96] += l;
  15907. /* z1 = z1 - z0 - z1 */
  15908. l = z1->dp[0];
  15909. h = 0;
  15910. SP_ASM_SUBB(l, h, z0->dp[0]);
  15911. SP_ASM_SUBB(l, h, z2->dp[0]);
  15912. z1->dp[0] = l;
  15913. l = h;
  15914. h = 0;
  15915. for (i = 1; i < 96; i++) {
  15916. l += z1->dp[i];
  15917. SP_ASM_SUBB(l, h, z0->dp[i]);
  15918. SP_ASM_SUBB(l, h, z2->dp[i]);
  15919. z1->dp[i] = l;
  15920. l = h;
  15921. h = 0;
  15922. }
  15923. z1->dp[i] += l;
  15924. /* r += z1 << 16 */
  15925. l = 0;
  15926. h = 0;
  15927. for (i = 0; i < 48; i++) {
  15928. SP_ASM_ADDC(l, h, r->dp[i + 48]);
  15929. SP_ASM_ADDC(l, h, z1->dp[i]);
  15930. r->dp[i + 48] = l;
  15931. l = h;
  15932. h = 0;
  15933. }
  15934. for (; i < 97; i++) {
  15935. SP_ASM_ADDC(l, h, z1->dp[i]);
  15936. r->dp[i + 48] = l;
  15937. l = h;
  15938. h = 0;
  15939. }
  15940. /* r += z2 << 96 */
  15941. l = 0;
  15942. h = 0;
  15943. for (i = 0; i < 49; i++) {
  15944. SP_ASM_ADDC(l, h, r->dp[i + 96]);
  15945. SP_ASM_ADDC(l, h, z2->dp[i]);
  15946. r->dp[i + 96] = l;
  15947. l = h;
  15948. h = 0;
  15949. }
  15950. for (; i < 96; i++) {
  15951. SP_ASM_ADDC(l, h, z2->dp[i]);
  15952. r->dp[i + 96] = l;
  15953. l = h;
  15954. h = 0;
  15955. }
  15956. r->used = 192;
  15957. sp_clamp(r);
  15958. }
  15959. FREE_SP_INT_ARRAY(z, NULL);
  15960. FREE_SP_INT(a1, NULL);
  15961. return err;
  15962. }
  15963. #endif /* SP_INT_DIGITS >= 192 */
  15964. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  15965. #endif /* !WOLFSSL_SP_SMALL */
  15966. /* Square a and store in r. r = a * a
  15967. *
  15968. * @param [in] a SP integer to square.
  15969. * @param [out] r SP integer result.
  15970. *
  15971. * @return MP_OKAY on success.
  15972. * @return MP_VAL when a or r is NULL, or the result will be too big for fixed
  15973. * data length.
  15974. * @return MP_MEM when dynamic memory allocation fails.
  15975. */
  15976. int sp_sqr(const sp_int* a, sp_int* r)
  15977. {
  15978. #if defined(WOLFSSL_SP_MATH) && defined(WOLFSSL_SP_SMALL)
  15979. return sp_mul(a, a, r);
  15980. #else
  15981. int err = MP_OKAY;
  15982. if ((a == NULL) || (r == NULL)) {
  15983. err = MP_VAL;
  15984. }
  15985. /* Need extra digit during calculation. */
  15986. if ((err == MP_OKAY) && (a->used * 2 > r->size)) {
  15987. err = MP_VAL;
  15988. }
  15989. #if 0
  15990. if (err == MP_OKAY) {
  15991. sp_print(a, "a");
  15992. }
  15993. #endif
  15994. if (err == MP_OKAY) {
  15995. if (a->used == 0) {
  15996. _sp_zero(r);
  15997. }
  15998. else
  15999. #ifndef WOLFSSL_SP_SMALL
  16000. #if !defined(WOLFSSL_HAVE_SP_ECC) && defined(HAVE_ECC)
  16001. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 256)
  16002. if (a->used == 4) {
  16003. err = _sp_sqr_4(a, r);
  16004. }
  16005. else
  16006. #endif /* SP_WORD_SIZE == 64 */
  16007. #if (SP_WORD_SIZE == 64 && SP_INT_BITS >= 384)
  16008. #ifdef SQR_MUL_ASM
  16009. if (a->used == 6) {
  16010. err = _sp_sqr_6(a, r);
  16011. }
  16012. else
  16013. #endif /* SQR_MUL_ASM */
  16014. #endif /* SP_WORD_SIZE == 64 */
  16015. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 256)
  16016. #ifdef SQR_MUL_ASM
  16017. if (a->used == 8) {
  16018. err = _sp_sqr_8(a, r);
  16019. }
  16020. else
  16021. #endif /* SQR_MUL_ASM */
  16022. #endif /* SP_WORD_SIZE == 32 */
  16023. #if (SP_WORD_SIZE == 32 && SP_INT_BITS >= 384)
  16024. #ifdef SQR_MUL_ASM
  16025. if (a->used == 12) {
  16026. err = _sp_sqr_12(a, r);
  16027. }
  16028. else
  16029. #endif /* SQR_MUL_ASM */
  16030. #endif /* SP_WORD_SIZE == 32 */
  16031. #endif /* !WOLFSSL_HAVE_SP_ECC && HAVE_ECC */
  16032. #if defined(SQR_MUL_ASM) && (defined(WOLFSSL_SP_INT_LARGE_COMBA) || \
  16033. (!defined(WOLFSSL_SP_MATH) && defined(WOLFCRYPT_HAVE_SAKKE) && \
  16034. (SP_WORD_SIZE == 64)))
  16035. #if SP_INT_DIGITS >= 32
  16036. if (a->used == 16) {
  16037. err = _sp_sqr_16(a, r);
  16038. }
  16039. else
  16040. #endif /* SP_INT_DIGITS >= 32 */
  16041. #endif /* SQR_MUL_ASM && (WOLFSSL_SP_INT_LARGE_COMBA || !WOLFSSL_SP_MATH &&
  16042. * WOLFCRYPT_HAVE_SAKKE && SP_WORD_SIZE == 64 */
  16043. #if defined(SQR_MUL_ASM) && defined(WOLFSSL_SP_INT_LARGE_COMBA)
  16044. #if SP_INT_DIGITS >= 48
  16045. if (a->used == 24) {
  16046. err = _sp_sqr_24(a, r);
  16047. }
  16048. else
  16049. #endif /* SP_INT_DIGITS >= 48 */
  16050. #if SP_INT_DIGITS >= 64
  16051. if (a->used == 32) {
  16052. err = _sp_sqr_32(a, r);
  16053. }
  16054. else
  16055. #endif /* SP_INT_DIGITS >= 64 */
  16056. #if SP_INT_DIGITS >= 96
  16057. if (a->used == 48) {
  16058. err = _sp_sqr_48(a, r);
  16059. }
  16060. else
  16061. #endif /* SP_INT_DIGITS >= 96 */
  16062. #if SP_INT_DIGITS >= 128
  16063. if (a->used == 64) {
  16064. err = _sp_sqr_64(a, r);
  16065. }
  16066. else
  16067. #endif /* SP_INT_DIGITS >= 128 */
  16068. #if SP_INT_DIGITS >= 192
  16069. if (a->used == 96) {
  16070. err = _sp_sqr_96(a, r);
  16071. }
  16072. else
  16073. #endif /* SP_INT_DIGITS >= 192 */
  16074. #endif /* SQR_MUL_ASM && WOLFSSL_SP_INT_LARGE_COMBA */
  16075. #endif /* !WOLFSSL_SP_SMALL */
  16076. {
  16077. err = _sp_sqr(a, r);
  16078. }
  16079. }
  16080. #ifdef WOLFSSL_SP_INT_NEGATIVE
  16081. if (err == MP_OKAY) {
  16082. r->sign = MP_ZPOS;
  16083. }
  16084. #endif
  16085. #if 0
  16086. if (err == MP_OKAY) {
  16087. sp_print(r, "rsqr");
  16088. }
  16089. #endif
  16090. return err;
  16091. #endif /* WOLFSSL_SP_MATH && WOLFSSL_SP_SMALL */
  16092. }
  16093. /* END SP_SQR implementations */
  16094. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH || HAVE_ECC ||
  16095. * (!NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY) */
  16096. #if defined(WOLFSSL_SP_MATH_ALL) || \
  16097. (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \
  16098. !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || defined(HAVE_ECC)
  16099. /* Square a mod m and store in r: r = (a * a) mod m
  16100. *
  16101. * @param [in] a SP integer to square.
  16102. * @param [in] m SP integer that is the modulus.
  16103. * @param [out] r SP integer result.
  16104. *
  16105. * @return MP_OKAY on success.
  16106. * @return MP_MEM when dynamic memory allocation fails.
  16107. */
  16108. static int _sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
  16109. {
  16110. int err = MP_OKAY;
  16111. /* Create temporary for multiplication result. */
  16112. DECL_SP_INT(t, a->used * 2);
  16113. ALLOC_SP_INT(t, a->used * 2, err, NULL);
  16114. if (err == MP_OKAY) {
  16115. err = sp_init_size(t, a->used * 2);
  16116. }
  16117. /* Square and reduce. */
  16118. if (err == MP_OKAY) {
  16119. err = sp_sqr(a, t);
  16120. }
  16121. if (err == MP_OKAY) {
  16122. err = sp_mod(t, m, r);
  16123. }
  16124. /* Dispose of an allocated SP int. */
  16125. FREE_SP_INT(t, NULL);
  16126. return err;
  16127. }
  16128. /* Square a mod m and store in r: r = (a * a) mod m
  16129. *
  16130. * @param [in] a SP integer to square.
  16131. * @param [in] m SP integer that is the modulus.
  16132. * @param [out] r SP integer result.
  16133. *
  16134. * @return MP_OKAY on success.
  16135. * @return MP_VAL when a, m or r is NULL; or m is 0; or a squared is too big
  16136. * for fixed data length.
  16137. * @return MP_MEM when dynamic memory allocation fails.
  16138. */
  16139. int sp_sqrmod(const sp_int* a, const sp_int* m, sp_int* r)
  16140. {
  16141. int err = MP_OKAY;
  16142. /* Validate parameters. */
  16143. if ((a == NULL) || (m == NULL) || (r == NULL)) {
  16144. err = MP_VAL;
  16145. }
  16146. /* Ensure r has space for intermediate result. */
  16147. if ((err == MP_OKAY) && (r != m) && (a->used * 2 > r->size)) {
  16148. err = MP_VAL;
  16149. }
  16150. /* Ensure a is not too big. */
  16151. if ((err == MP_OKAY) && (r == m) && (a->used * 2 > SP_INT_DIGITS)) {
  16152. err = MP_VAL;
  16153. }
  16154. /* Use r as intermediate result if not same as pointer m which is needed
  16155. * after first intermediate result.
  16156. */
  16157. if ((err == MP_OKAY) && (r != m)) {
  16158. /* Square and reduce. */
  16159. err = sp_sqr(a, r);
  16160. if (err == MP_OKAY) {
  16161. err = sp_mod(r, m, r);
  16162. }
  16163. }
  16164. else if (err == MP_OKAY) {
  16165. /* Do operation with temporary. */
  16166. err = _sp_sqrmod(a, m, r);
  16167. }
  16168. return err;
  16169. }
  16170. #endif /* !WOLFSSL_RSA_VERIFY_ONLY */
  16171. /**********************
  16172. * Montgomery functions
  16173. **********************/
  16174. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_HAVE_SP_DH) || \
  16175. defined(WOLFCRYPT_HAVE_ECCSI) || defined(WOLFCRYPT_HAVE_SAKKE) || \
  16176. defined(OPENSSL_ALL)
  16177. /* Reduce a number in Montgomery form.
  16178. *
  16179. * Assumes a and m are not NULL and m is not 0.
  16180. *
  16181. * DigitMask(a,i) := mask out the 'i'th digit in place.
  16182. *
  16183. * Algorithm:
  16184. * 1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
  16185. * 2. For i = 0..NumDigits(m)-1
  16186. * 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK
  16187. * 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask
  16188. * 2.3. a += mu * DigitMask(m, 0)
  16189. * 2.4. For j = 1 up to NumDigits(m)-2
  16190. * 2.4.1 a += mu * DigitMask(m, j)
  16191. * 2.5 a += mu * DigitMask(m, NumDigits(m)-1))
  16192. * 3. a >>= NumBits(m)
  16193. * 4. a = a % m
  16194. *
  16195. * @param [in,out] a SP integer to Montgomery reduce.
  16196. * @param [in] m SP integer that is the modulus.
  16197. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  16198. *
  16199. * @return MP_OKAY on success.
  16200. */
  16201. static int _sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp)
  16202. {
  16203. #if !defined(SQR_MUL_ASM)
  16204. unsigned int i;
  16205. int bits;
  16206. sp_int_word w;
  16207. sp_int_digit mu;
  16208. #if 0
  16209. sp_print(a, "a");
  16210. sp_print(m, "m");
  16211. #endif
  16212. /* Count bits in modulus. */
  16213. bits = sp_count_bits(m);
  16214. /* Adding numbers into m->used * 2 digits - zero out unused digits. */
  16215. for (i = a->used; i < m->used * 2; i++) {
  16216. a->dp[i] = 0;
  16217. }
  16218. /* Special case when modulus is 1 digit or less. */
  16219. if (m->used <= 1) {
  16220. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16221. mu = mp * a->dp[0];
  16222. /* a += mu * m */
  16223. w = a->dp[0];
  16224. w += (sp_int_word)mu * m->dp[0];
  16225. a->dp[0] = (sp_int_digit)w;
  16226. w >>= SP_WORD_SIZE;
  16227. w += a->dp[1];
  16228. a->dp[1] = (sp_int_digit)w;
  16229. w >>= SP_WORD_SIZE;
  16230. a->dp[2] = (sp_int_digit)w;
  16231. a->used = 3;
  16232. /* mp is SP_WORD_SIZE */
  16233. bits = SP_WORD_SIZE;
  16234. }
  16235. else {
  16236. /* 1. mask = (1 << (NumBits(m) % WORD_SIZE)) - 1
  16237. * Mask when last digit of modulus doesn't have highest bit set.
  16238. */
  16239. sp_int_digit mask = (sp_int_digit)
  16240. (((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1);
  16241. /* Overflow. */
  16242. sp_int_word o = 0;
  16243. /* 2. For i = 0..NumDigits(m)-1 */
  16244. for (i = 0; i < m->used; i++) {
  16245. unsigned int j;
  16246. /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16247. mu = mp * a->dp[i];
  16248. /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
  16249. if ((i == m->used - 1) && (mask != 0)) {
  16250. mu &= mask;
  16251. }
  16252. /* 2.3. a += mu * DigitMask(m, 0) */
  16253. w = a->dp[i];
  16254. w += (sp_int_word)mu * m->dp[0];
  16255. a->dp[i] = (sp_int_digit)w;
  16256. w >>= SP_WORD_SIZE;
  16257. /* 2.4. For j = 1 up to NumDigits(m)-2 */
  16258. for (j = 1; j < m->used - 1; j++) {
  16259. /* 2.4.1 a += mu * DigitMask(m, j) */
  16260. w += a->dp[i + j];
  16261. w += (sp_int_word)mu * m->dp[j];
  16262. a->dp[i + j] = (sp_int_digit)w;
  16263. w >>= SP_WORD_SIZE;
  16264. }
  16265. /* Handle overflow. */
  16266. w += o;
  16267. w += a->dp[i + j];
  16268. o = (sp_int_digit)(w >> SP_WORD_SIZE);
  16269. /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1)) */
  16270. w = ((sp_int_word)mu * m->dp[j]) + (sp_int_digit)w;
  16271. a->dp[i + j] = (sp_int_digit)w;
  16272. w >>= SP_WORD_SIZE;
  16273. o += w;
  16274. }
  16275. /* Handle overflow. */
  16276. o += a->dp[m->used * 2 - 1];
  16277. a->dp[m->used * 2 - 1] = (sp_int_digit)o;
  16278. o >>= SP_WORD_SIZE;
  16279. a->dp[m->used * 2] = (sp_int_digit)o;
  16280. a->used = m->used * 2 + 1;
  16281. }
  16282. /* Remove leading zeros. */
  16283. sp_clamp(a);
  16284. /* 3. a >>= NumBits(m) */
  16285. (void)sp_rshb(a, bits, a);
  16286. /* 4. a = a mod m */
  16287. if (_sp_cmp_abs(a, m) != MP_LT) {
  16288. _sp_sub_off(a, m, a, 0);
  16289. }
  16290. #if 0
  16291. sp_print(a, "rr");
  16292. #endif
  16293. return MP_OKAY;
  16294. #else /* !SQR_MUL_ASM */
  16295. unsigned int i;
  16296. unsigned int j;
  16297. int bits;
  16298. sp_int_digit mu;
  16299. sp_int_digit o;
  16300. sp_int_digit mask;
  16301. #if 0
  16302. sp_print(a, "a");
  16303. sp_print(m, "m");
  16304. #endif
  16305. bits = sp_count_bits(m);
  16306. mask = ((sp_int_digit)1 << (bits & (SP_WORD_SIZE - 1))) - 1;
  16307. for (i = a->used; i < m->used * 2; i++) {
  16308. a->dp[i] = 0;
  16309. }
  16310. if (m->used <= 1) {
  16311. sp_int_digit l;
  16312. sp_int_digit h;
  16313. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16314. mu = mp * a->dp[0];
  16315. /* a += mu * m */
  16316. l = a->dp[0];
  16317. h = 0;
  16318. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  16319. a->dp[0] = l;
  16320. l = h;
  16321. h = 0;
  16322. SP_ASM_ADDC(l, h, a->dp[1]);
  16323. a->dp[1] = l;
  16324. a->dp[2] = h;
  16325. a->used = m->used * 2 + 1;
  16326. /* mp is SP_WORD_SIZE */
  16327. bits = SP_WORD_SIZE;
  16328. }
  16329. #if !defined(WOLFSSL_SP_MATH) && defined(HAVE_ECC)
  16330. #if SP_WORD_SIZE == 64
  16331. #if SP_INT_DIGITS >= 8
  16332. else if ((m->used == 4) && (mask == 0)) {
  16333. sp_int_digit l;
  16334. sp_int_digit h;
  16335. sp_int_digit o2;
  16336. l = 0;
  16337. h = 0;
  16338. o = 0;
  16339. o2 = 0;
  16340. /* For i = 0..NumDigits(m)-1 */
  16341. for (i = 0; i < 4; i++) {
  16342. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16343. mu = mp * a->dp[0];
  16344. l = a->dp[0];
  16345. /* a = (a + mu * m) >> WORD_SIZE */
  16346. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  16347. l = h;
  16348. h = 0;
  16349. SP_ASM_ADDC(l, h, a->dp[1]);
  16350. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
  16351. a->dp[0] = l;
  16352. l = h;
  16353. h = 0;
  16354. SP_ASM_ADDC(l, h, a->dp[2]);
  16355. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
  16356. a->dp[1] = l;
  16357. l = h;
  16358. h = o2;
  16359. o2 = 0;
  16360. SP_ASM_ADDC_REG(l, h, o);
  16361. SP_ASM_ADDC(l, h, a->dp[i + 3]);
  16362. SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[3]);
  16363. a->dp[2] = l;
  16364. o = h;
  16365. l = h;
  16366. h = 0;
  16367. }
  16368. /* Handle overflow. */
  16369. h = o2;
  16370. SP_ASM_ADDC(l, h, a->dp[7]);
  16371. a->dp[3] = l;
  16372. a->dp[4] = h;
  16373. a->used = 5;
  16374. /* Remove leading zeros. */
  16375. sp_clamp(a);
  16376. /* a = a mod m */
  16377. if (_sp_cmp_abs(a, m) != MP_LT) {
  16378. _sp_sub_off(a, m, a, 0);
  16379. }
  16380. return MP_OKAY;
  16381. }
  16382. #endif /* SP_INT_DIGITS >= 8 */
  16383. #if SP_INT_DIGITS >= 12
  16384. else if ((m->used == 6) && (mask == 0)) {
  16385. sp_int_digit l;
  16386. sp_int_digit h;
  16387. sp_int_digit o2;
  16388. l = 0;
  16389. h = 0;
  16390. o = 0;
  16391. o2 = 0;
  16392. /* For i = 0..NumDigits(m)-1 */
  16393. for (i = 0; i < 6; i++) {
  16394. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16395. mu = mp * a->dp[0];
  16396. l = a->dp[0];
  16397. /* a = (a + mu * m) >> WORD_SIZE */
  16398. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[0]);
  16399. l = h;
  16400. h = 0;
  16401. SP_ASM_ADDC(l, h, a->dp[1]);
  16402. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[1]);
  16403. a->dp[0] = l;
  16404. l = h;
  16405. h = 0;
  16406. SP_ASM_ADDC(l, h, a->dp[2]);
  16407. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[2]);
  16408. a->dp[1] = l;
  16409. l = h;
  16410. h = 0;
  16411. SP_ASM_ADDC(l, h, a->dp[3]);
  16412. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[3]);
  16413. a->dp[2] = l;
  16414. l = h;
  16415. h = 0;
  16416. SP_ASM_ADDC(l, h, a->dp[4]);
  16417. SP_ASM_MUL_ADD_NO(l, h, mu, m->dp[4]);
  16418. a->dp[3] = l;
  16419. l = h;
  16420. h = o2;
  16421. o2 = 0;
  16422. SP_ASM_ADDC_REG(l, h, o);
  16423. SP_ASM_ADDC(l, h, a->dp[i + 5]);
  16424. SP_ASM_MUL_ADD(l, h, o2, mu, m->dp[5]);
  16425. a->dp[4] = l;
  16426. o = h;
  16427. l = h;
  16428. h = 0;
  16429. }
  16430. /* Handle overflow. */
  16431. h = o2;
  16432. SP_ASM_ADDC(l, h, a->dp[11]);
  16433. a->dp[5] = l;
  16434. a->dp[6] = h;
  16435. a->used = 7;
  16436. /* Remove leading zeros. */
  16437. sp_clamp(a);
  16438. /* a = a mod m */
  16439. if (_sp_cmp_abs(a, m) != MP_LT) {
  16440. _sp_sub_off(a, m, a, 0);
  16441. }
  16442. return MP_OKAY;
  16443. }
  16444. #endif /* SP_INT_DIGITS >= 12 */
  16445. #elif SP_WORD_SIZE == 32
  16446. else if ((m->used <= 12) && (mask == 0)) {
  16447. sp_int_digit l;
  16448. sp_int_digit h;
  16449. sp_int_digit o2;
  16450. sp_int_digit* ad;
  16451. const sp_int_digit* md;
  16452. o = 0;
  16453. o2 = 0;
  16454. ad = a->dp;
  16455. /* For i = 0..NumDigits(m)-1 */
  16456. for (i = 0; i < m->used; i++) {
  16457. md = m->dp;
  16458. /* mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16459. mu = mp * ad[0];
  16460. /* a = (a + mu * m, 0) >> WORD_SIZE */
  16461. l = ad[0];
  16462. h = 0;
  16463. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16464. l = h;
  16465. for (j = 1; j + 1 < m->used - 1; j += 2) {
  16466. h = 0;
  16467. SP_ASM_ADDC(l, h, ad[j]);
  16468. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16469. ad[j - 1] = l;
  16470. l = 0;
  16471. SP_ASM_ADDC(h, l, ad[j + 1]);
  16472. SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
  16473. ad[j] = h;
  16474. }
  16475. for (; j < m->used - 1; j++) {
  16476. h = 0;
  16477. SP_ASM_ADDC(l, h, ad[j]);
  16478. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16479. ad[j - 1] = l;
  16480. l = h;
  16481. }
  16482. h = o2;
  16483. o2 = 0;
  16484. SP_ASM_ADDC_REG(l, h, o);
  16485. SP_ASM_ADDC(l, h, ad[i + j]);
  16486. SP_ASM_MUL_ADD(l, h, o2, mu, *md);
  16487. ad[j - 1] = l;
  16488. o = h;
  16489. }
  16490. /* Handle overflow. */
  16491. l = o;
  16492. h = o2;
  16493. SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
  16494. a->dp[m->used - 1] = l;
  16495. a->dp[m->used] = h;
  16496. a->used = m->used + 1;
  16497. /* Remove leading zeros. */
  16498. sp_clamp(a);
  16499. /* a = a mod m */
  16500. if (_sp_cmp_abs(a, m) != MP_LT) {
  16501. _sp_sub_off(a, m, a, 0);
  16502. }
  16503. return MP_OKAY;
  16504. }
  16505. #endif /* SP_WORD_SIZE == 64 | 32 */
  16506. #endif /* !WOLFSSL_SP_MATH && HAVE_ECC */
  16507. else {
  16508. sp_int_digit l;
  16509. sp_int_digit h;
  16510. sp_int_digit o2;
  16511. sp_int_digit* ad;
  16512. const sp_int_digit* md;
  16513. o = 0;
  16514. o2 = 0;
  16515. ad = a->dp;
  16516. /* 2. For i = 0..NumDigits(m)-1 */
  16517. for (i = 0; i < m->used; i++, ad++) {
  16518. md = m->dp;
  16519. /* 2.1. mu = (mp * DigitMask(a, i)) & WORD_MASK */
  16520. mu = mp * ad[0];
  16521. /* 2.2. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
  16522. if ((i == m->used - 1) && (mask != 0)) {
  16523. mu &= mask;
  16524. }
  16525. /* 2.3 a += mu * DigitMask(m, 0) */
  16526. l = ad[0];
  16527. h = 0;
  16528. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16529. ad[0] = l;
  16530. l = h;
  16531. /* 2.4. If i == NumDigits(m)-1 and mask != 0 then mu & = mask */
  16532. for (j = 1; j + 1 < m->used - 1; j += 2) {
  16533. h = 0;
  16534. /* 2.4.1. a += mu * DigitMask(m, j) */
  16535. SP_ASM_ADDC(l, h, ad[j + 0]);
  16536. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16537. ad[j + 0] = l;
  16538. l = 0;
  16539. /* 2.4.1. a += mu * DigitMask(m, j) */
  16540. SP_ASM_ADDC(h, l, ad[j + 1]);
  16541. SP_ASM_MUL_ADD_NO(h, l, mu, *(md++));
  16542. ad[j + 1] = h;
  16543. }
  16544. for (; j < m->used - 1; j++) {
  16545. h = 0;
  16546. /* 2.4.1. a += mu * DigitMask(m, j) */
  16547. SP_ASM_ADDC(l, h, ad[j]);
  16548. SP_ASM_MUL_ADD_NO(l, h, mu, *(md++));
  16549. ad[j] = l;
  16550. l = h;
  16551. }
  16552. h = o2;
  16553. o2 = 0;
  16554. SP_ASM_ADDC_REG(l, h, o);
  16555. /* 2.5 a += mu * DigitMask(m, NumDigits(m)-1) */
  16556. SP_ASM_ADDC(l, h, ad[j]);
  16557. SP_ASM_MUL_ADD(l, h, o2, mu, *md);
  16558. ad[j] = l;
  16559. o = h;
  16560. }
  16561. /* Handle overflow. */
  16562. l = o;
  16563. h = o2;
  16564. SP_ASM_ADDC(l, h, a->dp[m->used * 2 - 1]);
  16565. a->dp[m->used * 2 - 1] = l;
  16566. a->dp[m->used * 2] = h;
  16567. a->used = m->used * 2 + 1;
  16568. }
  16569. /* Remove leading zeros. */
  16570. sp_clamp(a);
  16571. (void)sp_rshb(a, bits, a);
  16572. /* a = a mod m */
  16573. if (_sp_cmp_abs(a, m) != MP_LT) {
  16574. _sp_sub_off(a, m, a, 0);
  16575. }
  16576. #if 0
  16577. sp_print(a, "rr");
  16578. #endif
  16579. return MP_OKAY;
  16580. #endif /* !SQR_MUL_ASM */
  16581. }
  16582. #if !defined(WOLFSSL_RSA_VERIFY_ONLY) || \
  16583. (defined(WOLFSSL_SP_MATH_ALL) && defined(HAVE_ECC))
  16584. /* Reduce a number in Montgomery form.
  16585. *
  16586. * @param [in,out] a SP integer to Montgomery reduce.
  16587. * @param [in] m SP integer that is the modulus.
  16588. * @param [in] mp SP integer digit that is the bottom digit of inv(-m).
  16589. *
  16590. * @return MP_OKAY on success.
  16591. * @return MP_VAL when a or m is NULL or m is zero.
  16592. */
  16593. int sp_mont_red(sp_int* a, const sp_int* m, sp_int_digit mp)
  16594. {
  16595. int err;
  16596. /* Validate parameters. */
  16597. if ((a == NULL) || (m == NULL) || sp_iszero(m)) {
  16598. err = MP_VAL;
  16599. }
  16600. /* Ensure a has enough space for calculation. */
  16601. else if (a->size < m->used * 2 + 1) {
  16602. err = MP_VAL;
  16603. }
  16604. else {
  16605. /* Perform Montogomery Reduction. */
  16606. err = _sp_mont_red(a, m, mp);
  16607. }
  16608. return err;
  16609. }
  16610. #endif
  16611. /* Calculate the bottom digit of the inverse of negative m.
  16612. * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
  16613. *
  16614. * Used when performing Montgomery Reduction.
  16615. * m must be odd.
  16616. * Jeffrey Hurchalla’s method.
  16617. * https://arxiv.org/pdf/2204.04342.pdf
  16618. *
  16619. * @param [in] m SP integer that is the modulus.
  16620. * @param [out] mp SP integer digit that is the bottom digit of inv(-m).
  16621. */
  16622. static void _sp_mont_setup(const sp_int* m, sp_int_digit* rho)
  16623. {
  16624. sp_int_digit d = m->dp[0];
  16625. sp_int_digit x = (3 * d) ^ 2;
  16626. sp_int_digit y = 1 - d * x;
  16627. #if SP_WORD_SIZE >= 16
  16628. x *= 1 + y; y *= y;
  16629. #endif
  16630. #if SP_WORD_SIZE >= 32
  16631. x *= 1 + y; y *= y;
  16632. #endif
  16633. #if SP_WORD_SIZE >= 64
  16634. x *= 1 + y; y *= y;
  16635. #endif
  16636. x *= 1 + y;
  16637. /* rho = -1/m mod d, subtract x (unsigned) from 0, assign negative */
  16638. *rho = (sp_int_digit)((sp_int_sdigit)0 - (sp_int_sdigit)x);
  16639. }
  16640. /* Calculate the bottom digit of the inverse of negative m.
  16641. * (rho * m) mod 2^n = -1, where n is the number of bits in a digit.
  16642. *
  16643. * Used when performing Montgomery Reduction.
  16644. *
  16645. * @param [in] m SP integer that is the modulus.
  16646. * @param [out] mp SP integer digit that is the bottom digit of inv(-m).
  16647. *
  16648. * @return MP_OKAY on success.
  16649. * @return MP_VAL when m or rho is NULL.
  16650. */
  16651. int sp_mont_setup(const sp_int* m, sp_int_digit* rho)
  16652. {
  16653. int err = MP_OKAY;
  16654. /* Validate parameters. */
  16655. if ((m == NULL) || (rho == NULL)) {
  16656. err = MP_VAL;
  16657. }
  16658. /* Calculation only works with odd modulus. */
  16659. if ((err == MP_OKAY) && !sp_isodd(m)) {
  16660. err = MP_VAL;
  16661. }
  16662. if (err == MP_OKAY) {
  16663. /* Calculate negative of inverse mod 2^n. */
  16664. _sp_mont_setup(m, rho);
  16665. }
  16666. return err;
  16667. }
  16668. /* Calculate the normalization value of m.
  16669. * norm = 2^k - m, where k is the number of bits in m
  16670. *
  16671. * @param [out] norm SP integer that normalises numbers into Montgomery
  16672. * form.
  16673. * @param [in] m SP integer that is the modulus.
  16674. *
  16675. * @return MP_OKAY on success.
  16676. * @return MP_VAL when norm or m is NULL, or number of bits in m is maximual.
  16677. */
  16678. int sp_mont_norm(sp_int* norm, const sp_int* m)
  16679. {
  16680. int err = MP_OKAY;
  16681. unsigned int bits = 0;
  16682. /* Validate parameters. */
  16683. if ((norm == NULL) || (m == NULL)) {
  16684. err = MP_VAL;
  16685. }
  16686. if (err == MP_OKAY) {
  16687. /* Find top bit and ensure norm has enough space. */
  16688. bits = (unsigned int)sp_count_bits(m);
  16689. if (bits >= norm->size * SP_WORD_SIZE) {
  16690. err = MP_VAL;
  16691. }
  16692. }
  16693. if (err == MP_OKAY) {
  16694. /* Round up for case when m is less than a word - no advantage in using
  16695. * a smaller mask and would take more operations.
  16696. */
  16697. if (bits < SP_WORD_SIZE) {
  16698. bits = SP_WORD_SIZE;
  16699. }
  16700. /* Smallest number greater than m of form 2^n. */
  16701. _sp_zero(norm);
  16702. err = sp_set_bit(norm, (int)bits);
  16703. }
  16704. if (err == MP_OKAY) {
  16705. /* norm = 2^n % m */
  16706. err = sp_sub(norm, m, norm);
  16707. }
  16708. if ((err == MP_OKAY) && (bits == SP_WORD_SIZE)) {
  16709. /* Sub made norm one word and now finish calculation. */
  16710. norm->dp[0] %= m->dp[0];
  16711. }
  16712. if (err == MP_OKAY) {
  16713. /* Remove leading zeros. */
  16714. sp_clamp(norm);
  16715. }
  16716. return err;
  16717. }
  16718. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_HAVE_SP_DH ||
  16719. * WOLFCRYPT_HAVE_ECCSI || WOLFCRYPT_HAVE_SAKKE */
  16720. /*********************************
  16721. * To and from binary and strings.
  16722. *********************************/
  16723. /* Calculate the number of 8-bit values required to represent the
  16724. * multi-precision number.
  16725. *
  16726. * When a is NULL, return s 0.
  16727. *
  16728. * @param [in] a SP integer.
  16729. *
  16730. * @return The count of 8-bit values.
  16731. * @return 0 when a is NULL.
  16732. */
  16733. int sp_unsigned_bin_size(const sp_int* a)
  16734. {
  16735. int cnt = 0;
  16736. if (a != NULL) {
  16737. cnt = (sp_count_bits(a) + 7) / 8;
  16738. }
  16739. return cnt;
  16740. }
  16741. /* Convert a number as an array of bytes in big-endian format to a
  16742. * multi-precision number.
  16743. *
  16744. * @param [out] a SP integer.
  16745. * @param [in] in Array of bytes.
  16746. * @param [in] inSz Number of data bytes in array.
  16747. *
  16748. * @return MP_OKAY on success.
  16749. * @return MP_VAL when the number is too big to fit in an SP.
  16750. */
  16751. int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz)
  16752. {
  16753. int err = MP_OKAY;
  16754. /* Validate parameters. */
  16755. if ((a == NULL) || ((in == NULL) && (inSz > 0))) {
  16756. err = MP_VAL;
  16757. }
  16758. /* Check a has enough space for number. */
  16759. if ((err == MP_OKAY) && (inSz > (word32)a->size * SP_WORD_SIZEOF)) {
  16760. err = MP_VAL;
  16761. }
  16762. if (err == MP_OKAY) {
  16763. /* Load full digits at a time from in. */
  16764. int i;
  16765. int j = 0;
  16766. a->used = (inSz + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
  16767. #if defined(BIG_ENDIAN_ORDER) && !defined(WOLFSSL_SP_INT_DIGIT_ALIGN)
  16768. /* Data endian matches respresentation of number.
  16769. * Directly copy if we don't have alignment issues.
  16770. */
  16771. for (i = (int)(inSz-1); i > SP_WORD_SIZEOF-1; i -= SP_WORD_SIZEOF) {
  16772. a->dp[j++] = *(sp_int_digit*)(in + i - (SP_WORD_SIZEOF - 1));
  16773. }
  16774. #else
  16775. /* Construct digit from required number of bytes. */
  16776. for (i = (int)(inSz-1); i >= SP_WORD_SIZEOF - 1; i -= SP_WORD_SIZEOF) {
  16777. a->dp[j] = ((sp_int_digit)in[i - 0] << 0)
  16778. #if SP_WORD_SIZE >= 16
  16779. | ((sp_int_digit)in[i - 1] << 8)
  16780. #endif
  16781. #if SP_WORD_SIZE >= 32
  16782. | ((sp_int_digit)in[i - 2] << 16) |
  16783. ((sp_int_digit)in[i - 3] << 24)
  16784. #endif
  16785. #if SP_WORD_SIZE >= 64
  16786. | ((sp_int_digit)in[i - 4] << 32) |
  16787. ((sp_int_digit)in[i - 5] << 40) |
  16788. ((sp_int_digit)in[i - 6] << 48) |
  16789. ((sp_int_digit)in[i - 7] << 56)
  16790. #endif
  16791. ;
  16792. j++;
  16793. }
  16794. #endif
  16795. #if SP_WORD_SIZE >= 16
  16796. /* Handle leftovers. */
  16797. if (i >= 0) {
  16798. #ifdef BIG_ENDIAN_ORDER
  16799. int s;
  16800. /* Place remaining bytes into last digit. */
  16801. a->dp[a->used - 1] = 0;
  16802. for (s = 0; i >= 0; i--,s += 8) {
  16803. a->dp[j] |= ((sp_int_digit)in[i]) << s;
  16804. }
  16805. #else
  16806. /* Cast digits to an array of bytes so we can insert directly. */
  16807. byte *d = (byte*)a->dp;
  16808. /* Zero out all bytes in last digit. */
  16809. a->dp[a->used - 1] = 0;
  16810. /* Place remaining bytes directly into digit. */
  16811. switch (i) {
  16812. #if SP_WORD_SIZE >= 64
  16813. case 6: d[inSz - 1 - 6] = in[6]; FALL_THROUGH;
  16814. case 5: d[inSz - 1 - 5] = in[5]; FALL_THROUGH;
  16815. case 4: d[inSz - 1 - 4] = in[4]; FALL_THROUGH;
  16816. case 3: d[inSz - 1 - 3] = in[3]; FALL_THROUGH;
  16817. #endif
  16818. #if SP_WORD_SIZE >= 32
  16819. case 2: d[inSz - 1 - 2] = in[2]; FALL_THROUGH;
  16820. case 1: d[inSz - 1 - 1] = in[1]; FALL_THROUGH;
  16821. #endif
  16822. case 0: d[inSz - 1 - 0] = in[0];
  16823. }
  16824. #endif /* LITTLE_ENDIAN_ORDER */
  16825. }
  16826. #endif
  16827. sp_clamp(a);
  16828. }
  16829. return err;
  16830. }
  16831. /* Convert the multi-precision number to an array of bytes in big-endian format.
  16832. *
  16833. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  16834. * to calculate the number of bytes required.
  16835. *
  16836. * @param [in] a SP integer.
  16837. * @param [out] out Array to put encoding into.
  16838. *
  16839. * @return MP_OKAY on success.
  16840. * @return MP_VAL when a or out is NULL.
  16841. */
  16842. int sp_to_unsigned_bin(const sp_int* a, byte* out)
  16843. {
  16844. /* Write assuming output buffer is big enough. */
  16845. return sp_to_unsigned_bin_len(a, out, sp_unsigned_bin_size(a));
  16846. }
  16847. /* Convert the multi-precision number to an array of bytes in big-endian format.
  16848. *
  16849. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  16850. * to calculate the number of bytes required.
  16851. * Front-pads the output array with zeros to make number the size of the array.
  16852. *
  16853. * @param [in] a SP integer.
  16854. * @param [out] out Array to put encoding into.
  16855. * @param [in] outSz Size of the array in bytes.
  16856. *
  16857. * @return MP_OKAY on success.
  16858. * @return MP_VAL when a or out is NULL.
  16859. */
  16860. int sp_to_unsigned_bin_len(const sp_int* a, byte* out, int outSz)
  16861. {
  16862. int err = MP_OKAY;
  16863. /* Validate parameters. */
  16864. if ((a == NULL) || (out == NULL) || (outSz < 0)) {
  16865. err = MP_VAL;
  16866. }
  16867. #if SP_WORD_SIZE > 8
  16868. if (err == MP_OKAY) {
  16869. /* Start at the end of the buffer - least significant byte. */
  16870. int j = outSz - 1;
  16871. if (!sp_iszero(a)) {
  16872. unsigned int i;
  16873. /* Put each digit in. */
  16874. for (i = 0; (j >= 0) && (i < a->used); i++) {
  16875. int b;
  16876. sp_int_digit d = a->dp[i];
  16877. /* Place each byte of a digit into the buffer. */
  16878. for (b = 0; b < SP_WORD_SIZE; b += 8) {
  16879. out[j--] = (byte)d;
  16880. d >>= 8;
  16881. /* Stop if the output buffer is filled. */
  16882. if (j < 0) {
  16883. if ((i < a->used - 1) || (d > 0)) {
  16884. err = MP_VAL;
  16885. }
  16886. break;
  16887. }
  16888. }
  16889. }
  16890. }
  16891. /* Front pad buffer with 0s. */
  16892. for (; j >= 0; j--) {
  16893. out[j] = 0;
  16894. }
  16895. }
  16896. #else
  16897. if ((err == MP_OKAY) && ((unsigned int)outSz < a->used)) {
  16898. err = MP_VAL;
  16899. }
  16900. if (err == MP_OKAY) {
  16901. unsigned int i;
  16902. int j;
  16903. XMEMSET(out, 0, (unsigned int)outSz - a->used);
  16904. for (i = 0, j = outSz - 1; i < a->used; i++, j--) {
  16905. out[j] = a->dp[i];
  16906. }
  16907. }
  16908. #endif
  16909. return err;
  16910. }
  16911. #if defined(WOLFSSL_SP_MATH_ALL) && !defined(NO_RSA) && \
  16912. !defined(WOLFSSL_RSA_VERIFY_ONLY)
  16913. /* Store the number in big-endian format in array at an offset.
  16914. * The array must be large enough for encoded number - use mp_unsigned_bin_size
  16915. * to calculate the number of bytes required.
  16916. *
  16917. * @param [in] o Offset into array o start encoding.
  16918. * @param [in] a SP integer.
  16919. * @param [out] out Array to put encoding into.
  16920. *
  16921. * @return Index of next byte after data.
  16922. * @return MP_VAL when a or out is NULL.
  16923. */
  16924. int sp_to_unsigned_bin_at_pos(int o, const sp_int* a, unsigned char* out)
  16925. {
  16926. /* Get length of data that will be written. */
  16927. int len = sp_unsigned_bin_size(a);
  16928. /* Write number to buffer at offset. */
  16929. int ret = sp_to_unsigned_bin_len(a, out + o, len);
  16930. if (ret == MP_OKAY) {
  16931. /* Return offset of next byte after number. */
  16932. ret = o + len;
  16933. }
  16934. return ret;
  16935. }
  16936. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && !WOLFSSL_RSA_VERIFY_ONLY */
  16937. #ifdef WOLFSSL_SP_READ_RADIX_16
  16938. /* Convert hexadecimal number as string in big-endian format to a
  16939. * multi-precision number.
  16940. *
  16941. * Assumes negative sign and leading zeros have been stripped.
  16942. *
  16943. * @param [out] a SP integer.
  16944. * @param [in] in NUL terminated string.
  16945. *
  16946. * @return MP_OKAY on success.
  16947. * @return MP_VAL when radix not supported, value is negative, or a character
  16948. * is not valid.
  16949. */
  16950. static int _sp_read_radix_16(sp_int* a, const char* in)
  16951. {
  16952. int err = MP_OKAY;
  16953. int i;
  16954. unsigned int s = 0;
  16955. unsigned int j = 0;
  16956. sp_int_digit d;
  16957. /* Make all nibbles in digit 0. */
  16958. d = 0;
  16959. /* Step through string a character at a time starting at end - least
  16960. * significant byte. */
  16961. for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) {
  16962. /* Convert character from hex. */
  16963. int ch = (int)HexCharToByte(in[i]);
  16964. /* Check for invalid character. */
  16965. if (ch < 0) {
  16966. err = MP_VAL;
  16967. break;
  16968. }
  16969. /* Check whether we have filled the digit. */
  16970. if (s == SP_WORD_SIZE) {
  16971. /* Store digit and move index to next in a. */
  16972. a->dp[j++] = d;
  16973. /* Fail if we are out of space in a. */
  16974. if (j >= a->size) {
  16975. err = MP_VAL;
  16976. break;
  16977. }
  16978. /* Set shift back to 0 - lowest nibble. */
  16979. s = 0;
  16980. /* Make all nibbles in digit 0. */
  16981. d = 0;
  16982. }
  16983. /* Put next nibble into digit. */
  16984. d |= ((sp_int_digit)ch) << s;
  16985. /* Update shift for next nibble. */
  16986. s += 4;
  16987. }
  16988. if (err == MP_OKAY) {
  16989. /* If space, store last digit. */
  16990. if (j < a->size) {
  16991. a->dp[j] = d;
  16992. }
  16993. /* Update used count. */
  16994. a->used = j + 1;
  16995. /* Remove leading zeros. */
  16996. sp_clamp(a);
  16997. }
  16998. return err;
  16999. }
  17000. #endif /* WOLFSSL_SP_READ_RADIX_16 */
  17001. #ifdef WOLFSSL_SP_READ_RADIX_10
  17002. /* Convert decimal number as string in big-endian format to a multi-precision
  17003. * number.
  17004. *
  17005. * Assumes negative sign and leading zeros have been stripped.
  17006. *
  17007. * @param [out] a SP integer.
  17008. * @param [in] in NUL terminated string.
  17009. *
  17010. * @return MP_OKAY on success.
  17011. * @return MP_VAL when radix not supported, value is negative, or a character
  17012. * is not valid.
  17013. */
  17014. static int _sp_read_radix_10(sp_int* a, const char* in)
  17015. {
  17016. int err = MP_OKAY;
  17017. int i;
  17018. char ch;
  17019. /* Start with a being zero. */
  17020. _sp_zero(a);
  17021. /* Process all characters. */
  17022. for (i = 0; in[i] != '\0'; i++) {
  17023. /* Get character. */
  17024. ch = in[i];
  17025. /* Check character is valid. */
  17026. if ((ch >= '0') && (ch <= '9')) {
  17027. /* Assume '0'..'9' are continuous valus as characters. */
  17028. ch -= '0';
  17029. }
  17030. else {
  17031. /* Return error on invalid character. */
  17032. err = MP_VAL;
  17033. break;
  17034. }
  17035. /* Multiply a by 10. */
  17036. err = _sp_mul_d(a, 10, a, 0);
  17037. if (err != MP_OKAY) {
  17038. break;
  17039. }
  17040. /* Add character value. */
  17041. err = _sp_add_d(a, (sp_int_digit)ch, a);
  17042. if (err != MP_OKAY) {
  17043. break;
  17044. }
  17045. }
  17046. return err;
  17047. }
  17048. #endif /* WOLFSSL_SP_READ_RADIX_10 */
  17049. #if defined(WOLFSSL_SP_READ_RADIX_16) || defined(WOLFSSL_SP_READ_RADIX_10)
  17050. /* Convert a number as string in big-endian format to a big number.
  17051. * Only supports base-16 (hexadecimal) and base-10 (decimal).
  17052. *
  17053. * Negative values supported when WOLFSSL_SP_INT_NEGATIVE is defined.
  17054. *
  17055. * @param [out] a SP integer.
  17056. * @param [in] in NUL terminated string.
  17057. * @param [in] radix Number of values in a digit.
  17058. *
  17059. * @return MP_OKAY on success.
  17060. * @return MP_VAL when a or in is NULL, radix not supported, value is negative,
  17061. * or a character is not valid.
  17062. */
  17063. int sp_read_radix(sp_int* a, const char* in, int radix)
  17064. {
  17065. int err = MP_OKAY;
  17066. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17067. unsigned int sign = MP_ZPOS;
  17068. #endif
  17069. if ((a == NULL) || (in == NULL)) {
  17070. err = MP_VAL;
  17071. }
  17072. if (err == MP_OKAY) {
  17073. #ifndef WOLFSSL_SP_INT_NEGATIVE
  17074. if (*in == '-') {
  17075. err = MP_VAL;
  17076. }
  17077. else
  17078. #endif
  17079. {
  17080. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17081. if (*in == '-') {
  17082. /* Make number negative if signed string. */
  17083. sign = MP_NEG;
  17084. in++;
  17085. }
  17086. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17087. /* Skip leading zeros. */
  17088. while (*in == '0') {
  17089. in++;
  17090. }
  17091. if (radix == 16) {
  17092. err = _sp_read_radix_16(a, in);
  17093. }
  17094. #ifdef WOLFSSL_SP_READ_RADIX_10
  17095. else if (radix == 10) {
  17096. err = _sp_read_radix_10(a, in);
  17097. }
  17098. #endif
  17099. else {
  17100. err = MP_VAL;
  17101. }
  17102. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17103. /* Ensure not negative when zero. */
  17104. if (err == MP_OKAY) {
  17105. if (sp_iszero(a)) {
  17106. a->sign = MP_ZPOS;
  17107. }
  17108. else {
  17109. a->sign = sign;
  17110. }
  17111. }
  17112. #endif
  17113. }
  17114. }
  17115. return err;
  17116. }
  17117. #endif /* WOLFSSL_SP_READ_RADIX_16 || WOLFSSL_SP_READ_RADIX_10 */
  17118. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17119. defined(WC_MP_TO_RADIX)
  17120. /* Put the big-endian, hex string encoding of a into str.
  17121. *
  17122. * Assumes str is large enough for result.
  17123. * Use sp_radix_size() to calculate required length.
  17124. *
  17125. * @param [in] a SP integer to convert.
  17126. * @param [out] str String to hold hex string result.
  17127. *
  17128. * @return MP_OKAY on success.
  17129. * @return MP_VAL when a or str is NULL.
  17130. */
  17131. int sp_tohex(const sp_int* a, char* str)
  17132. {
  17133. int err = MP_OKAY;
  17134. /* Validate parameters. */
  17135. if ((a == NULL) || (str == NULL)) {
  17136. err = MP_VAL;
  17137. }
  17138. if (err == MP_OKAY) {
  17139. /* Quick out if number is zero. */
  17140. if (sp_iszero(a) == MP_YES) {
  17141. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17142. /* Make string represent complete bytes. */
  17143. *str++ = '0';
  17144. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17145. *str++ = '0';
  17146. }
  17147. else {
  17148. int i;
  17149. int j;
  17150. sp_int_digit d;
  17151. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17152. if (a->sign == MP_NEG) {
  17153. /* Add negative sign character. */
  17154. *str = '-';
  17155. str++;
  17156. }
  17157. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17158. /* Start at last digit - most significant digit. */
  17159. i = (int)(a->used - 1);
  17160. d = a->dp[i];
  17161. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17162. /* Find highest non-zero byte in most-significant word. */
  17163. for (j = SP_WORD_SIZE - 8; j >= 0 && i >= 0; j -= 8) {
  17164. /* When a byte at this index is not 0 break out to start
  17165. * writing.
  17166. */
  17167. if (((d >> j) & 0xff) != 0) {
  17168. break;
  17169. }
  17170. /* Skip this digit if it was 0. */
  17171. if (j == 0) {
  17172. j = SP_WORD_SIZE - 8;
  17173. d = a->dp[--i];
  17174. }
  17175. }
  17176. /* Start with high nibble of byte. */
  17177. j += 4;
  17178. #else
  17179. /* Find highest non-zero nibble in most-significant word. */
  17180. for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
  17181. /* When a nibble at this index is not 0 break out to start
  17182. * writing.
  17183. */
  17184. if (((d >> j) & 0xf) != 0) {
  17185. break;
  17186. }
  17187. /* Skip this digit if it was 0. */
  17188. if (j == 0) {
  17189. j = SP_WORD_SIZE - 4;
  17190. d = a->dp[--i];
  17191. }
  17192. }
  17193. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17194. /* Write out as much as required from most-significant digit. */
  17195. for (; j >= 0; j -= 4) {
  17196. *(str++) = ByteToHex((byte)(d >> j));
  17197. }
  17198. /* Write rest of digits. */
  17199. for (--i; i >= 0; i--) {
  17200. /* Get digit from memory. */
  17201. d = a->dp[i];
  17202. /* Write out all nibbles of digit. */
  17203. for (j = SP_WORD_SIZE - 4; j >= 0; j -= 4) {
  17204. *(str++) = (char)ByteToHex((byte)(d >> j));
  17205. }
  17206. }
  17207. }
  17208. /* Terminate string. */
  17209. *str = '\0';
  17210. }
  17211. return err;
  17212. }
  17213. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  17214. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17215. defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \
  17216. defined(WC_MP_TO_RADIX)
  17217. /* Put the big-endian, decimal string encoding of a into str.
  17218. *
  17219. * Assumes str is large enough for result.
  17220. * Use sp_radix_size() to calculate required length.
  17221. *
  17222. * @param [in] a SP integer to convert.
  17223. * @param [out] str String to hold hex string result.
  17224. *
  17225. * @return MP_OKAY on success.
  17226. * @return MP_VAL when a or str is NULL.
  17227. * @return MP_MEM when dynamic memory allocation fails.
  17228. */
  17229. int sp_todecimal(const sp_int* a, char* str)
  17230. {
  17231. int err = MP_OKAY;
  17232. int i;
  17233. int j;
  17234. sp_int_digit d = 0;
  17235. /* Validate parameters. */
  17236. if ((a == NULL) || (str == NULL)) {
  17237. err = MP_VAL;
  17238. }
  17239. /* Quick out if number is zero. */
  17240. else if (sp_iszero(a) == MP_YES) {
  17241. *str++ = '0';
  17242. *str = '\0';
  17243. }
  17244. else if (a->used >= SP_INT_DIGITS) {
  17245. err = MP_VAL;
  17246. }
  17247. else {
  17248. /* Temporary that is divided by 10. */
  17249. DECL_SP_INT(t, a->used + 1);
  17250. ALLOC_SP_INT_SIZE(t, a->used + 1, err, NULL);
  17251. if (err == MP_OKAY) {
  17252. _sp_copy(a, t);
  17253. }
  17254. if (err == MP_OKAY) {
  17255. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17256. if (a->sign == MP_NEG) {
  17257. /* Add negative sign character. */
  17258. *str = '-';
  17259. str++;
  17260. }
  17261. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17262. /* Write out little endian. */
  17263. i = 0;
  17264. do {
  17265. /* Divide by 10 and get remainder of division. */
  17266. (void)sp_div_d(t, 10, t, &d);
  17267. /* Write out remainder as a character. */
  17268. str[i++] = (char)('0' + d);
  17269. }
  17270. /* Keep going while we there is a value to write. */
  17271. while (!sp_iszero(t));
  17272. /* Terminate string. */
  17273. str[i] = '\0';
  17274. if (err == MP_OKAY) {
  17275. /* Reverse string to big endian. */
  17276. for (j = 0; j <= (i - 1) / 2; j++) {
  17277. int c = (unsigned char)str[j];
  17278. str[j] = str[i - 1 - j];
  17279. str[i - 1 - j] = (char)c;
  17280. }
  17281. }
  17282. }
  17283. FREE_SP_INT(t, NULL);
  17284. }
  17285. return err;
  17286. }
  17287. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  17288. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17289. defined(WC_MP_TO_RADIX)
  17290. /* Put the string version, big-endian, of a in str using the given radix.
  17291. *
  17292. * @param [in] a SP integer to convert.
  17293. * @param [out] str String to hold hex string result.
  17294. * @param [in] radix Base of character.
  17295. * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
  17296. *
  17297. * @return MP_OKAY on success.
  17298. * @return MP_VAL when a or str is NULL, or radix not supported.
  17299. */
  17300. int sp_toradix(const sp_int* a, char* str, int radix)
  17301. {
  17302. int err = MP_OKAY;
  17303. /* Validate parameters. */
  17304. if ((a == NULL) || (str == NULL)) {
  17305. err = MP_VAL;
  17306. }
  17307. /* Handle base 16 if requested. */
  17308. else if (radix == MP_RADIX_HEX) {
  17309. err = sp_tohex(a, str);
  17310. }
  17311. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
  17312. defined(HAVE_COMP_KEY)
  17313. /* Handle base 10 if requested. */
  17314. else if (radix == MP_RADIX_DEC) {
  17315. err = sp_todecimal(a, str);
  17316. }
  17317. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  17318. else {
  17319. /* Base not supported. */
  17320. err = MP_VAL;
  17321. }
  17322. return err;
  17323. }
  17324. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  17325. #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \
  17326. defined(WC_MP_TO_RADIX)
  17327. /* Calculate the length of the string version, big-endian, of a using the given
  17328. * radix.
  17329. *
  17330. * @param [in] a SP integer to convert.
  17331. * @param [in] radix Base of character.
  17332. * Valid values: MP_RADIX_HEX, MP_RADIX_DEC.
  17333. * @param [out] size The number of characters in encoding.
  17334. *
  17335. * @return MP_OKAY on success.
  17336. * @return MP_VAL when a or size is NULL, or radix not supported.
  17337. */
  17338. int sp_radix_size(const sp_int* a, int radix, int* size)
  17339. {
  17340. int err = MP_OKAY;
  17341. /* Validate parameters. */
  17342. if ((a == NULL) || (size == NULL)) {
  17343. err = MP_VAL;
  17344. }
  17345. /* Handle base 16 if requested. */
  17346. else if (radix == MP_RADIX_HEX) {
  17347. if (a->used == 0) {
  17348. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17349. /* 00 and '\0' */
  17350. *size = 2 + 1;
  17351. #else
  17352. /* Zero and '\0' */
  17353. *size = 1 + 1;
  17354. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17355. }
  17356. else {
  17357. /* Count of nibbles. */
  17358. int cnt = (sp_count_bits(a) + 3) / 4;
  17359. #ifndef WC_DISABLE_RADIX_ZERO_PAD
  17360. /* Must have even number of nibbles to have complete bytes. */
  17361. if (cnt & 1) {
  17362. cnt++;
  17363. }
  17364. #endif /* WC_DISABLE_RADIX_ZERO_PAD */
  17365. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17366. /* Add to count of characters for negative sign. */
  17367. if (a->sign == MP_NEG) {
  17368. cnt++;
  17369. }
  17370. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17371. /* One more for \0 */
  17372. *size = cnt + 1;
  17373. }
  17374. }
  17375. #if defined(WOLFSSL_SP_MATH_ALL) || defined(WOLFSSL_KEY_GEN) || \
  17376. defined(HAVE_COMP_KEY)
  17377. /* Handle base 10 if requested. */
  17378. else if (radix == MP_RADIX_DEC) {
  17379. int i;
  17380. sp_int_digit d;
  17381. /* quick out if its zero */
  17382. if (sp_iszero(a) == MP_YES) {
  17383. /* Zero and '\0' */
  17384. *size = 1 + 1;
  17385. }
  17386. else {
  17387. DECL_SP_INT(t, a->used);
  17388. /* Temporary to be divided by 10. */
  17389. ALLOC_SP_INT(t, a->used, err, NULL);
  17390. if (err == MP_OKAY) {
  17391. t->size = a->used;
  17392. _sp_copy(a, t);
  17393. }
  17394. if (err == MP_OKAY) {
  17395. /* Count number of times number can be divided by 10. */
  17396. for (i = 0; !sp_iszero(t); i++) {
  17397. (void)sp_div_d(t, 10, t, &d);
  17398. }
  17399. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17400. /* Add to count of characters for negative sign. */
  17401. if (a->sign == MP_NEG) {
  17402. i++;
  17403. }
  17404. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17405. /* One more for \0 */
  17406. *size = i + 1;
  17407. }
  17408. FREE_SP_INT(t, NULL);
  17409. }
  17410. }
  17411. #endif /* WOLFSSL_SP_MATH_ALL || WOLFSSL_KEY_GEN || HAVE_COMP_KEY */
  17412. else {
  17413. /* Base not supported. */
  17414. err = MP_VAL;
  17415. }
  17416. return err;
  17417. }
  17418. #endif /* (WOLFSSL_SP_MATH_ALL && !WOLFSSL_RSA_VERIFY_ONLY) || WC_MP_TO_RADIX */
  17419. /***************************************
  17420. * Prime number generation and checking.
  17421. ***************************************/
  17422. #if defined(WOLFSSL_KEY_GEN) && (!defined(NO_RSA) || !defined(NO_DH) || \
  17423. !defined(NO_DSA)) && !defined(WC_NO_RNG)
  17424. #ifndef WOLFSSL_SP_MILLER_RABIN_CNT
  17425. /* Always done 8 iterations of Miller-Rabin on check of primality when
  17426. * generating.
  17427. */
  17428. #define WOLFSSL_SP_MILLER_RABIN_CNT 8
  17429. #endif
  17430. /* Generate a random prime for RSA only.
  17431. *
  17432. * @param [out] r SP integer to hold result.
  17433. * @param [in] len Number of bytes in prime. Use -ve to indicate the two
  17434. * lowest bits must be set.
  17435. * @param [in] rng Random number generator.
  17436. * @param [in] heap Heap hint. Unused.
  17437. *
  17438. * @return MP_OKAY on success
  17439. * @return MP_VAL when r or rng is NULL, length is not supported or random
  17440. * number generator fails.
  17441. */
  17442. int sp_rand_prime(sp_int* r, int len, WC_RNG* rng, void* heap)
  17443. {
  17444. static const byte USE_BBS = 3;
  17445. int err = MP_OKAY;
  17446. byte low_bits = 1;
  17447. int isPrime = MP_NO;
  17448. #if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
  17449. int bits = 0;
  17450. #endif /* WOLFSSL_SP_MATH_ALL */
  17451. unsigned int digits = 0;
  17452. (void)heap;
  17453. /* Check NULL parameters and 0 is not prime so 0 bytes is invalid. */
  17454. if ((r == NULL) || (rng == NULL) || (len == 0)) {
  17455. err = MP_VAL;
  17456. }
  17457. if (err == MP_OKAY) {
  17458. /* Get type. */
  17459. if (len < 0) {
  17460. low_bits = USE_BBS;
  17461. len = -len;
  17462. }
  17463. /* Get number of digits required to handle required number of bytes. */
  17464. digits = ((unsigned int)len + SP_WORD_SIZEOF - 1) / SP_WORD_SIZEOF;
  17465. /* Ensure result has space. */
  17466. if (r->size < digits) {
  17467. err = MP_VAL;
  17468. }
  17469. }
  17470. if (err == MP_OKAY) {
  17471. #ifndef WOLFSSL_SP_MATH_ALL
  17472. /* For minimal maths, support only what's in SP and needed for DH. */
  17473. #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN)
  17474. if (len == 32) {
  17475. }
  17476. else
  17477. #endif /* WOLFSSL_HAVE_SP_DH && WOLFSSL_KEY_GEN */
  17478. /* Generate RSA primes that are half the modulus length. */
  17479. #ifdef WOLFSSL_SP_4096
  17480. if (len == 256) {
  17481. /* Support 2048-bit operations compiled in. */
  17482. }
  17483. else
  17484. #endif
  17485. #ifndef WOLFSSL_SP_NO_3072
  17486. if (len == 192) {
  17487. /* Support 1536-bit operations compiled in. */
  17488. }
  17489. else
  17490. #endif
  17491. #ifndef WOLFSSL_SP_NO_2048
  17492. if (len == 128) {
  17493. /* Support 1024-bit operations compiled in. */
  17494. }
  17495. else
  17496. #endif
  17497. {
  17498. /* Bit length not supported in SP. */
  17499. err = MP_VAL;
  17500. }
  17501. #endif /* !WOLFSSL_SP_MATH_ALL */
  17502. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17503. /* Generated number is always positive. */
  17504. r->sign = MP_ZPOS;
  17505. #endif /* WOLFSSL_SP_INT_NEGATIVE */
  17506. /* Set number of digits that will be used. */
  17507. r->used = digits;
  17508. #if defined(WOLFSSL_SP_MATH_ALL) || defined(BIG_ENDIAN_ORDER)
  17509. /* Calculate number of bits in last digit. */
  17510. bits = (len * 8) & SP_WORD_MASK;
  17511. #endif /* WOLFSSL_SP_MATH_ALL || BIG_ENDIAN_ORDER */
  17512. }
  17513. /* Assume the candidate is probably prime and then test until it is proven
  17514. * composite.
  17515. */
  17516. while ((err == MP_OKAY) && (isPrime == MP_NO)) {
  17517. #ifdef SHOW_GEN
  17518. printf(".");
  17519. fflush(stdout);
  17520. #endif /* SHOW_GEN */
  17521. /* Generate bytes into digit array. */
  17522. err = wc_RNG_GenerateBlock(rng, (byte*)r->dp, (word32)len);
  17523. if (err != 0) {
  17524. err = MP_VAL;
  17525. break;
  17526. }
  17527. /* Set top bits to ensure bit length required is generated.
  17528. * Also set second top to help ensure product of two primes is
  17529. * going to be twice the number of bits of each.
  17530. */
  17531. #ifdef LITTLE_ENDIAN_ORDER
  17532. ((byte*)r->dp)[len-1] |= 0x80 | 0x40;
  17533. #else
  17534. ((byte*)(r->dp + r->used - 1))[0] |= 0x80 | 0x40;
  17535. #endif /* LITTLE_ENDIAN_ORDER */
  17536. #ifdef BIG_ENDIAN_ORDER
  17537. /* Bytes were put into wrong place when less than full digit. */
  17538. if (bits != 0) {
  17539. r->dp[r->used - 1] >>= SP_WORD_SIZE - bits;
  17540. }
  17541. #endif /* BIG_ENDIAN_ORDER */
  17542. #ifdef WOLFSSL_SP_MATH_ALL
  17543. /* Mask top digit when less than a digit requested. */
  17544. if (bits > 0) {
  17545. r->dp[r->used - 1] &= ((sp_int_digit)1 << bits) - 1;
  17546. }
  17547. #endif /* WOLFSSL_SP_MATH_ALL */
  17548. /* Set mandatory low bits
  17549. * - bottom bit to make odd.
  17550. * - For BBS, second lowest too to make Blum integer (3 mod 4).
  17551. */
  17552. r->dp[0] |= low_bits;
  17553. /* Running Miller-Rabin up to 3 times gives us a 2^{-80} chance
  17554. * of a 1024-bit candidate being a false positive, when it is our
  17555. * prime candidate. (Note 4.49 of Handbook of Applied Cryptography.)
  17556. */
  17557. err = sp_prime_is_prime_ex(r, WOLFSSL_SP_MILLER_RABIN_CNT, &isPrime,
  17558. rng);
  17559. }
  17560. return err;
  17561. }
  17562. #endif /* WOLFSSL_KEY_GEN && (!NO_DH || !NO_DSA) && !WC_NO_RNG */
  17563. #ifdef WOLFSSL_SP_PRIME_GEN
  17564. /* Miller-Rabin test of "a" to the base of "b" as described in
  17565. * HAC pp. 139 Algorithm 4.24
  17566. *
  17567. * Sets result to 0 if definitely composite or 1 if probably prime.
  17568. * Randomly the chance of error is no more than 1/4 and often
  17569. * very much lower.
  17570. *
  17571. * a is assumed to be odd.
  17572. *
  17573. * @param [in] a SP integer to check.
  17574. * @param [in] b SP integer that is a small prime.
  17575. * @param [out] result MP_YES when number is likey prime.
  17576. * MP_NO otherwise.
  17577. * @param [in] n1 SP integer temporary.
  17578. * @param [in] r SP integer temporary.
  17579. *
  17580. * @return MP_OKAY on success.
  17581. * @return MP_MEM when dynamic memory allocation fails.
  17582. */
  17583. static int sp_prime_miller_rabin(const sp_int* a, sp_int* b, int* result,
  17584. sp_int* n1, sp_int* r)
  17585. {
  17586. int err = MP_OKAY;
  17587. int s = 0;
  17588. sp_int* y = b;
  17589. /* Assume not prime. */
  17590. *result = MP_NO;
  17591. /* Ensure small prime is 2 or more. */
  17592. if (sp_cmp_d(b, 1) != MP_GT) {
  17593. err = MP_VAL;
  17594. }
  17595. if (err == MP_OKAY) {
  17596. /* n1 = a - 1 (a is assumed odd.) */
  17597. (void)sp_copy(a, n1);
  17598. n1->dp[0]--;
  17599. /* Set 2**s * r = n1 */
  17600. /* Count the number of least significant bits which are zero. */
  17601. s = sp_cnt_lsb(n1);
  17602. /* Divide n - 1 by 2**s into r. */
  17603. (void)sp_rshb(n1, s, r);
  17604. /* Compute y = b**r mod a */
  17605. err = sp_exptmod(b, r, a, y);
  17606. }
  17607. if (err == MP_OKAY) {
  17608. /* Assume probably prime until shown otherwise. */
  17609. *result = MP_YES;
  17610. /* If y != 1 and y != n1 do */
  17611. if ((sp_cmp_d(y, 1) != MP_EQ) && (_sp_cmp(y, n1) != MP_EQ)) {
  17612. int j = 1;
  17613. /* While j <= s-1 and y != n1 */
  17614. while ((j <= (s - 1)) && (_sp_cmp(y, n1) != MP_EQ)) {
  17615. /* Square for bit shifted down. */
  17616. err = sp_sqrmod(y, a, y);
  17617. if (err != MP_OKAY) {
  17618. break;
  17619. }
  17620. /* If y == 1 then composite. */
  17621. if (sp_cmp_d(y, 1) == MP_EQ) {
  17622. *result = MP_NO;
  17623. break;
  17624. }
  17625. ++j;
  17626. }
  17627. /* If y != n1 then composite. */
  17628. if ((*result == MP_YES) && (_sp_cmp(y, n1) != MP_EQ)) {
  17629. *result = MP_NO;
  17630. }
  17631. }
  17632. }
  17633. return err;
  17634. }
  17635. #if SP_WORD_SIZE == 8
  17636. /* Number of pre-computed primes. First n primes - fitting in a digit. */
  17637. #define SP_PRIME_SIZE 54
  17638. static const sp_int_digit sp_primes[SP_PRIME_SIZE] = {
  17639. 0x02, 0x03, 0x05, 0x07, 0x0B, 0x0D, 0x11, 0x13,
  17640. 0x17, 0x1D, 0x1F, 0x25, 0x29, 0x2B, 0x2F, 0x35,
  17641. 0x3B, 0x3D, 0x43, 0x47, 0x49, 0x4F, 0x53, 0x59,
  17642. 0x61, 0x65, 0x67, 0x6B, 0x6D, 0x71, 0x7F, 0x83,
  17643. 0x89, 0x8B, 0x95, 0x97, 0x9D, 0xA3, 0xA7, 0xAD,
  17644. 0xB3, 0xB5, 0xBF, 0xC1, 0xC5, 0xC7, 0xD3, 0xDF,
  17645. 0xE3, 0xE5, 0xE9, 0xEF, 0xF1, 0xFB
  17646. };
  17647. #else
  17648. /* Number of pre-computed primes. First n primes. */
  17649. #define SP_PRIME_SIZE 256
  17650. /* The first 256 primes. */
  17651. static const sp_uint16 sp_primes[SP_PRIME_SIZE] = {
  17652. 0x0002, 0x0003, 0x0005, 0x0007, 0x000B, 0x000D, 0x0011, 0x0013,
  17653. 0x0017, 0x001D, 0x001F, 0x0025, 0x0029, 0x002B, 0x002F, 0x0035,
  17654. 0x003B, 0x003D, 0x0043, 0x0047, 0x0049, 0x004F, 0x0053, 0x0059,
  17655. 0x0061, 0x0065, 0x0067, 0x006B, 0x006D, 0x0071, 0x007F, 0x0083,
  17656. 0x0089, 0x008B, 0x0095, 0x0097, 0x009D, 0x00A3, 0x00A7, 0x00AD,
  17657. 0x00B3, 0x00B5, 0x00BF, 0x00C1, 0x00C5, 0x00C7, 0x00D3, 0x00DF,
  17658. 0x00E3, 0x00E5, 0x00E9, 0x00EF, 0x00F1, 0x00FB, 0x0101, 0x0107,
  17659. 0x010D, 0x010F, 0x0115, 0x0119, 0x011B, 0x0125, 0x0133, 0x0137,
  17660. 0x0139, 0x013D, 0x014B, 0x0151, 0x015B, 0x015D, 0x0161, 0x0167,
  17661. 0x016F, 0x0175, 0x017B, 0x017F, 0x0185, 0x018D, 0x0191, 0x0199,
  17662. 0x01A3, 0x01A5, 0x01AF, 0x01B1, 0x01B7, 0x01BB, 0x01C1, 0x01C9,
  17663. 0x01CD, 0x01CF, 0x01D3, 0x01DF, 0x01E7, 0x01EB, 0x01F3, 0x01F7,
  17664. 0x01FD, 0x0209, 0x020B, 0x021D, 0x0223, 0x022D, 0x0233, 0x0239,
  17665. 0x023B, 0x0241, 0x024B, 0x0251, 0x0257, 0x0259, 0x025F, 0x0265,
  17666. 0x0269, 0x026B, 0x0277, 0x0281, 0x0283, 0x0287, 0x028D, 0x0293,
  17667. 0x0295, 0x02A1, 0x02A5, 0x02AB, 0x02B3, 0x02BD, 0x02C5, 0x02CF,
  17668. 0x02D7, 0x02DD, 0x02E3, 0x02E7, 0x02EF, 0x02F5, 0x02F9, 0x0301,
  17669. 0x0305, 0x0313, 0x031D, 0x0329, 0x032B, 0x0335, 0x0337, 0x033B,
  17670. 0x033D, 0x0347, 0x0355, 0x0359, 0x035B, 0x035F, 0x036D, 0x0371,
  17671. 0x0373, 0x0377, 0x038B, 0x038F, 0x0397, 0x03A1, 0x03A9, 0x03AD,
  17672. 0x03B3, 0x03B9, 0x03C7, 0x03CB, 0x03D1, 0x03D7, 0x03DF, 0x03E5,
  17673. 0x03F1, 0x03F5, 0x03FB, 0x03FD, 0x0407, 0x0409, 0x040F, 0x0419,
  17674. 0x041B, 0x0425, 0x0427, 0x042D, 0x043F, 0x0443, 0x0445, 0x0449,
  17675. 0x044F, 0x0455, 0x045D, 0x0463, 0x0469, 0x047F, 0x0481, 0x048B,
  17676. 0x0493, 0x049D, 0x04A3, 0x04A9, 0x04B1, 0x04BD, 0x04C1, 0x04C7,
  17677. 0x04CD, 0x04CF, 0x04D5, 0x04E1, 0x04EB, 0x04FD, 0x04FF, 0x0503,
  17678. 0x0509, 0x050B, 0x0511, 0x0515, 0x0517, 0x051B, 0x0527, 0x0529,
  17679. 0x052F, 0x0551, 0x0557, 0x055D, 0x0565, 0x0577, 0x0581, 0x058F,
  17680. 0x0593, 0x0595, 0x0599, 0x059F, 0x05A7, 0x05AB, 0x05AD, 0x05B3,
  17681. 0x05BF, 0x05C9, 0x05CB, 0x05CF, 0x05D1, 0x05D5, 0x05DB, 0x05E7,
  17682. 0x05F3, 0x05FB, 0x0607, 0x060D, 0x0611, 0x0617, 0x061F, 0x0623,
  17683. 0x062B, 0x062F, 0x063D, 0x0641, 0x0647, 0x0649, 0x064D, 0x0653
  17684. };
  17685. #endif
  17686. /* Compare the first n primes with a.
  17687. *
  17688. * @param [in] a Number to check.
  17689. * @param [out] result Whether number was found to be prime.
  17690. * @return 0 when no small prime matches.
  17691. * @return 1 when small prime matches.
  17692. */
  17693. static WC_INLINE int sp_cmp_primes(const sp_int* a, int* result)
  17694. {
  17695. int i;
  17696. int haveRes = 0;
  17697. *result = MP_NO;
  17698. /* Check one digit a against primes table. */
  17699. for (i = 0; i < SP_PRIME_SIZE; i++) {
  17700. if (sp_cmp_d(a, sp_primes[i]) == MP_EQ) {
  17701. *result = MP_YES;
  17702. haveRes = 1;
  17703. break;
  17704. }
  17705. }
  17706. return haveRes;
  17707. }
  17708. /* Using composites is only faster when using 64-bit values. */
  17709. #if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
  17710. /* Number of composites. */
  17711. #define SP_COMP_CNT 38
  17712. /* Products of small primes that fit into 64-bits. */
  17713. static sp_int_digit sp_comp[SP_COMP_CNT] = {
  17714. 0x088886ffdb344692, 0x34091fa96ffdf47b, 0x3c47d8d728a77ebb,
  17715. 0x077ab7da9d709ea9, 0x310df3e7bd4bc897, 0xe657d7a1fd5161d1,
  17716. 0x02ad3dbe0cca85ff, 0x0787f9a02c3388a7, 0x1113c5cc6d101657,
  17717. 0x2456c94f936bdb15, 0x4236a30b85ffe139, 0x805437b38eada69d,
  17718. 0x00723e97bddcd2af, 0x00a5a792ee239667, 0x00e451352ebca269,
  17719. 0x013a7955f14b7805, 0x01d37cbd653b06ff, 0x0288fe4eca4d7cdf,
  17720. 0x039fddb60d3af63d, 0x04cd73f19080fb03, 0x0639c390b9313f05,
  17721. 0x08a1c420d25d388f, 0x0b4b5322977db499, 0x0e94c170a802ee29,
  17722. 0x11f6a0e8356100df, 0x166c8898f7b3d683, 0x1babda0a0afd724b,
  17723. 0x2471b07c44024abf, 0x2d866dbc2558ad71, 0x3891410d45fb47df,
  17724. 0x425d5866b049e263, 0x51f767298e2cf13b, 0x6d9f9ece5fc74f13,
  17725. 0x7f5ffdb0f56ee64d, 0x943740d46a1bc71f, 0xaf2d7ca25cec848f,
  17726. 0xcec010484e4ad877, 0xef972c3cfafbcd25
  17727. };
  17728. /* Index of next prime after those used to create composite. */
  17729. static int sp_comp_idx[SP_COMP_CNT] = {
  17730. 15, 25, 34, 42, 50, 58, 65, 72, 79, 86, 93, 100, 106, 112, 118,
  17731. 124, 130, 136, 142, 148, 154, 160, 166, 172, 178, 184, 190, 196, 202, 208,
  17732. 214, 220, 226, 232, 238, 244, 250, 256
  17733. };
  17734. #endif
  17735. /* Determines whether any of the first n small primes divide a evenly.
  17736. *
  17737. * @param [in] a Number to check.
  17738. * @param [in, out] haveRes Boolean indicating a no prime result found.
  17739. * @param [in, out] result Whether a is known to be prime.
  17740. * @return MP_OKAY on success.
  17741. * @return Negative on failure.
  17742. */
  17743. static WC_INLINE int sp_div_primes(const sp_int* a, int* haveRes, int* result)
  17744. {
  17745. int i;
  17746. #if !defined(WOLFSSL_SP_SMALL) && (SP_WORD_SIZE == 64)
  17747. int j;
  17748. #endif
  17749. sp_int_digit d;
  17750. int err = MP_OKAY;
  17751. #if defined(WOLFSSL_SP_SMALL) || (SP_WORD_SIZE < 64)
  17752. /* Do trial division of a with all known small primes. */
  17753. for (i = 0; i < SP_PRIME_SIZE; i++) {
  17754. /* Small prime divides a when remainder is 0. */
  17755. err = sp_mod_d(a, (sp_int_digit)sp_primes[i], &d);
  17756. if ((err != MP_OKAY) || (d == 0)) {
  17757. *result = MP_NO;
  17758. *haveRes = 1;
  17759. break;
  17760. }
  17761. }
  17762. #else
  17763. /* Start with first prime in composite. */
  17764. i = 0;
  17765. for (j = 0; (!(*haveRes)) && (j < SP_COMP_CNT); j++) {
  17766. /* Reduce a down to a single word. */
  17767. err = sp_mod_d(a, sp_comp[j], &d);
  17768. if ((err != MP_OKAY) || (d == 0)) {
  17769. *result = MP_NO;
  17770. *haveRes = 1;
  17771. break;
  17772. }
  17773. /* Do trial division of d with small primes that make up composite. */
  17774. for (; i < sp_comp_idx[j]; i++) {
  17775. /* Small prime divides a when remainder is 0. */
  17776. if (d % sp_primes[i] == 0) {
  17777. *result = MP_NO;
  17778. *haveRes = 1;
  17779. break;
  17780. }
  17781. }
  17782. }
  17783. #endif
  17784. return err;
  17785. }
  17786. /* Check whether a is prime by checking t iterations of Miller-Rabin.
  17787. *
  17788. * @param [in] a SP integer to check.
  17789. * @param [in] trials Number of trials of Miller-Rabin test to perform.
  17790. * @param [out] result MP_YES when number is prime.
  17791. * MP_NO otherwise.
  17792. *
  17793. * @return MP_OKAY on success.
  17794. * @return MP_MEM when dynamic memory allocation fails.
  17795. */
  17796. static int _sp_prime_trials(const sp_int* a, int trials, int* result)
  17797. {
  17798. int err = MP_OKAY;
  17799. int i;
  17800. sp_int* n1;
  17801. sp_int* r;
  17802. DECL_SP_INT_ARRAY(t, a->used + 1, 2);
  17803. DECL_SP_INT(b, a->used * 2 + 1);
  17804. ALLOC_SP_INT_ARRAY(t, a->used + 1, 2, err, NULL);
  17805. /* Allocate number that will hold modular exponentiation result. */
  17806. ALLOC_SP_INT(b, a->used * 2 + 1, err, NULL);
  17807. if (err == MP_OKAY) {
  17808. n1 = t[0];
  17809. r = t[1];
  17810. _sp_init_size(n1, a->used + 1);
  17811. _sp_init_size(r, a->used + 1);
  17812. _sp_init_size(b, a->used * 2 + 1);
  17813. /* Do requested number of trials of Miller-Rabin test. */
  17814. for (i = 0; i < trials; i++) {
  17815. /* Miller-Rabin test with known small prime. */
  17816. _sp_set(b, sp_primes[i]);
  17817. err = sp_prime_miller_rabin(a, b, result, n1, r);
  17818. if ((err != MP_OKAY) || (*result == MP_NO)) {
  17819. break;
  17820. }
  17821. }
  17822. /* Clear temporary values. */
  17823. sp_clear(n1);
  17824. sp_clear(r);
  17825. sp_clear(b);
  17826. }
  17827. /* Free allocated temporary. */
  17828. FREE_SP_INT(b, NULL);
  17829. FREE_SP_INT_ARRAY(t, NULL);
  17830. return err;
  17831. }
  17832. /* Check whether a is prime.
  17833. * Checks against a number of small primes and does t iterations of
  17834. * Miller-Rabin.
  17835. *
  17836. * @param [in] a SP integer to check.
  17837. * @param [in] trials Number of trials of Miller-Rabin test to perform.
  17838. * @param [out] result MP_YES when number is prime.
  17839. * MP_NO otherwise.
  17840. *
  17841. * @return MP_OKAY on success.
  17842. * @return MP_VAL when a or result is NULL, or trials is out of range.
  17843. * @return MP_MEM when dynamic memory allocation fails.
  17844. */
  17845. int sp_prime_is_prime(const sp_int* a, int trials, int* result)
  17846. {
  17847. int err = MP_OKAY;
  17848. int haveRes = 0;
  17849. /* Validate parameters. */
  17850. if ((a == NULL) || (result == NULL)) {
  17851. if (result != NULL) {
  17852. *result = MP_NO;
  17853. }
  17854. err = MP_VAL;
  17855. }
  17856. else if (a->used * 2 >= SP_INT_DIGITS) {
  17857. err = MP_VAL;
  17858. }
  17859. /* Check validity of Miller-Rabin iterations count.
  17860. * Must do at least one and need a unique pre-computed prime for each
  17861. * iteration.
  17862. */
  17863. if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
  17864. *result = MP_NO;
  17865. err = MP_VAL;
  17866. }
  17867. /* Short-cut, 1 is not prime. */
  17868. if ((err == MP_OKAY) && sp_isone(a)) {
  17869. *result = MP_NO;
  17870. haveRes = 1;
  17871. }
  17872. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  17873. /* Check against known small primes when a has 1 digit. */
  17874. if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
  17875. (a->dp[0] <= sp_primes[SP_PRIME_SIZE - 1])) {
  17876. haveRes = sp_cmp_primes(a, result);
  17877. }
  17878. /* Check all small primes for even divisibility. */
  17879. if ((err == MP_OKAY) && (!haveRes)) {
  17880. err = sp_div_primes(a, &haveRes, result);
  17881. }
  17882. /* Check a number of iterations of Miller-Rabin with small primes. */
  17883. if ((err == MP_OKAY) && (!haveRes)) {
  17884. err = _sp_prime_trials(a, trials, result);
  17885. }
  17886. RESTORE_VECTOR_REGISTERS();
  17887. return err;
  17888. }
  17889. #ifndef WC_NO_RNG
  17890. /* Check whether a is prime by doing t iterations of Miller-Rabin.
  17891. *
  17892. * t random numbers should give a (1/4)^t chance of a false prime.
  17893. *
  17894. * @param [in] a SP integer to check.
  17895. * @param [in] trials Number of iterations of Miller-Rabin test to perform.
  17896. * @param [out] result MP_YES when number is prime.
  17897. * MP_NO otherwise.
  17898. * @param [in] rng Random number generator for Miller-Rabin testing.
  17899. *
  17900. * @return MP_OKAY on success.
  17901. * @return MP_VAL when a, result or rng is NULL.
  17902. * @return MP_MEM when dynamic memory allocation fails.
  17903. */
  17904. static int _sp_prime_random_trials(const sp_int* a, int trials, int* result,
  17905. WC_RNG* rng)
  17906. {
  17907. int err = MP_OKAY;
  17908. int bits = sp_count_bits(a);
  17909. word32 baseSz = ((word32)bits + 7) / 8;
  17910. DECL_SP_INT_ARRAY(ds, a->used + 1, 2);
  17911. DECL_SP_INT_ARRAY(d, a->used * 2 + 1, 2);
  17912. ALLOC_SP_INT_ARRAY(ds, a->used + 1, 2, err, NULL);
  17913. ALLOC_SP_INT_ARRAY(d, a->used * 2 + 1, 2, err, NULL);
  17914. if (err == MP_OKAY) {
  17915. sp_int* c = ds[0];
  17916. sp_int* n1 = ds[1];
  17917. sp_int* b = d[0];
  17918. sp_int* r = d[1];
  17919. _sp_init_size(c , a->used + 1);
  17920. _sp_init_size(n1, a->used + 1);
  17921. _sp_init_size(b , a->used * 2 + 1);
  17922. _sp_init_size(r , a->used * 2 + 1);
  17923. _sp_sub_d(a, 2, c);
  17924. bits &= SP_WORD_MASK;
  17925. /* Keep trying random numbers until all trials complete. */
  17926. while (trials > 0) {
  17927. /* Generate random trial number. */
  17928. err = wc_RNG_GenerateBlock(rng, (byte*)b->dp, baseSz);
  17929. if (err != MP_OKAY) {
  17930. break;
  17931. }
  17932. b->used = a->used;
  17933. #ifdef BIG_ENDIAN_ORDER
  17934. /* Fix top digit if fewer bytes than a full digit generated. */
  17935. if (((baseSz * 8) & SP_WORD_MASK) != 0) {
  17936. b->dp[b->used-1] >>=
  17937. SP_WORD_SIZE - ((baseSz * 8) & SP_WORD_MASK);
  17938. }
  17939. #endif /* BIG_ENDIAN_ORDER */
  17940. /* Ensure the top word has no more bits than necessary. */
  17941. if (bits > 0) {
  17942. b->dp[b->used - 1] &= ((sp_int_digit)1 << bits) - 1;
  17943. sp_clamp(b);
  17944. }
  17945. /* Can't use random value it is: 0, 1, a-2, a-1, >= a */
  17946. if ((sp_cmp_d(b, 2) != MP_GT) || (_sp_cmp(b, c) != MP_LT)) {
  17947. continue;
  17948. }
  17949. /* Perform Miller-Rabin test with random value. */
  17950. err = sp_prime_miller_rabin(a, b, result, n1, r);
  17951. if ((err != MP_OKAY) || (*result == MP_NO)) {
  17952. break;
  17953. }
  17954. /* Trial complete. */
  17955. trials--;
  17956. }
  17957. /* Zeroize temporary values used when generating private prime. */
  17958. sp_forcezero(n1);
  17959. sp_forcezero(r);
  17960. sp_forcezero(b);
  17961. sp_forcezero(c);
  17962. }
  17963. FREE_SP_INT_ARRAY(d, NULL);
  17964. FREE_SP_INT_ARRAY(ds, NULL);
  17965. return err;
  17966. }
  17967. #endif /*!WC_NO_RNG */
  17968. /* Check whether a is prime.
  17969. * Checks against a number of small primes and does t iterations of
  17970. * Miller-Rabin.
  17971. *
  17972. * @param [in] a SP integer to check.
  17973. * @param [in] trials Number of iterations of Miller-Rabin test to perform.
  17974. * @param [out] result MP_YES when number is prime.
  17975. * MP_NO otherwise.
  17976. * @param [in] rng Random number generator for Miller-Rabin testing.
  17977. *
  17978. * @return MP_OKAY on success.
  17979. * @return MP_VAL when a, result or rng is NULL.
  17980. * @return MP_MEM when dynamic memory allocation fails.
  17981. */
  17982. int sp_prime_is_prime_ex(const sp_int* a, int trials, int* result, WC_RNG* rng)
  17983. {
  17984. int err = MP_OKAY;
  17985. int ret = MP_YES;
  17986. int haveRes = 0;
  17987. if ((a == NULL) || (result == NULL) || (rng == NULL)) {
  17988. err = MP_VAL;
  17989. }
  17990. #ifndef WC_NO_RNG
  17991. if ((err == MP_OKAY) && (a->used * 2 >= SP_INT_DIGITS)) {
  17992. err = MP_VAL;
  17993. }
  17994. #endif
  17995. #ifdef WOLFSSL_SP_INT_NEGATIVE
  17996. if ((err == MP_OKAY) && (a->sign == MP_NEG)) {
  17997. err = MP_VAL;
  17998. }
  17999. #endif
  18000. /* Ensure trials is valid. Maximum based on number of small primes
  18001. * available. */
  18002. if ((err == MP_OKAY) && ((trials <= 0) || (trials > SP_PRIME_SIZE))) {
  18003. err = MP_VAL;
  18004. }
  18005. if ((err == MP_OKAY) && sp_isone(a)) {
  18006. ret = MP_NO;
  18007. haveRes = 1;
  18008. }
  18009. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  18010. /* Check against known small primes when a has 1 digit. */
  18011. if ((err == MP_OKAY) && (!haveRes) && (a->used == 1) &&
  18012. (a->dp[0] <= (sp_int_digit)sp_primes[SP_PRIME_SIZE - 1])) {
  18013. haveRes = sp_cmp_primes(a, &ret);
  18014. }
  18015. /* Check all small primes for even divisibility. */
  18016. if ((err == MP_OKAY) && (!haveRes)) {
  18017. err = sp_div_primes(a, &haveRes, &ret);
  18018. }
  18019. #ifndef WC_NO_RNG
  18020. /* Check a number of iterations of Miller-Rabin with random large values. */
  18021. if ((err == MP_OKAY) && (!haveRes)) {
  18022. err = _sp_prime_random_trials(a, trials, &ret, rng);
  18023. }
  18024. #else
  18025. (void)trials;
  18026. #endif /* !WC_NO_RNG */
  18027. if (result != NULL) {
  18028. *result = ret;
  18029. }
  18030. RESTORE_VECTOR_REGISTERS();
  18031. return err;
  18032. }
  18033. #endif /* WOLFSSL_SP_PRIME_GEN */
  18034. #if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN)
  18035. /* Calculates the Greatest Common Denominator (GCD) of a and b into r.
  18036. *
  18037. * Find the largest number that divides both a and b without remainder.
  18038. * r <= a, r <= b, a % r == 0, b % r == 0
  18039. *
  18040. * a and b are positive integers.
  18041. *
  18042. * Euclidian Algorithm:
  18043. * 1. If a > b then a = b, b = a
  18044. * 2. u = a
  18045. * 3. v = b % a
  18046. * 4. While v != 0
  18047. * 4.1. t = u % v
  18048. * 4.2. u <= v, v <= t, t <= u
  18049. * 5. r = u
  18050. *
  18051. * @param [in] a SP integer of first operand.
  18052. * @param [in] b SP integer of second operand.
  18053. * @param [out] r SP integer to hold result.
  18054. *
  18055. * @return MP_OKAY on success.
  18056. * @return MP_MEM when dynamic memory allocation fails.
  18057. */
  18058. static WC_INLINE int _sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
  18059. {
  18060. int err = MP_OKAY;
  18061. sp_int* u = NULL;
  18062. sp_int* v = NULL;
  18063. sp_int* t = NULL;
  18064. /* Used for swapping sp_ints. */
  18065. sp_int* s;
  18066. /* Determine maximum digit length numbers will reach. */
  18067. unsigned int used = (a->used >= b->used) ? a->used + 1 : b->used + 1;
  18068. DECL_SP_INT_ARRAY(d, used, 3);
  18069. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  18070. ALLOC_SP_INT_ARRAY(d, used, 3, err, NULL);
  18071. if (err == MP_OKAY) {
  18072. u = d[0];
  18073. v = d[1];
  18074. t = d[2];
  18075. _sp_init_size(u, used);
  18076. _sp_init_size(v, used);
  18077. _sp_init_size(t, used);
  18078. /* 1. If a > b then a = b, b = a.
  18079. * Make a <= b.
  18080. */
  18081. if (_sp_cmp(a, b) == MP_GT) {
  18082. const sp_int* tmp;
  18083. tmp = a;
  18084. a = b;
  18085. b = tmp;
  18086. }
  18087. /* 2. u = a, v = b mod a */
  18088. _sp_copy(a, u);
  18089. /* 3. v = b mod a */
  18090. if (a->used == 1) {
  18091. err = sp_mod_d(b, a->dp[0], &v->dp[0]);
  18092. v->used = (v->dp[0] != 0);
  18093. }
  18094. else {
  18095. err = sp_mod(b, a, v);
  18096. }
  18097. }
  18098. /* 4. While v != 0 */
  18099. /* Keep reducing larger by smaller until smaller is 0 or u and v both one
  18100. * digit.
  18101. */
  18102. while ((err == MP_OKAY) && (!sp_iszero(v)) && (u->used > 1)) {
  18103. /* u' = v, v' = u mod v */
  18104. /* 4.1 t = u mod v */
  18105. if (v->used == 1) {
  18106. err = sp_mod_d(u, v->dp[0], &t->dp[0]);
  18107. t->used = (t->dp[0] != 0);
  18108. }
  18109. else {
  18110. err = sp_mod(u, v, t);
  18111. }
  18112. /* 4.2. u <= v, v <= t, t <= u */
  18113. s = u; u = v; v = t; t = s;
  18114. }
  18115. /* Only one digit remaining in u and v. */
  18116. while ((err == MP_OKAY) && (!sp_iszero(v))) {
  18117. /* u' = v, v' = u mod v */
  18118. /* 4.1 t = u mod v */
  18119. t->dp[0] = u->dp[0] % v->dp[0];
  18120. t->used = (t->dp[0] != 0);
  18121. /* 4.2. u <= v, v <= t, t <= u */
  18122. s = u; u = v; v = t; t = s;
  18123. }
  18124. if (err == MP_OKAY) {
  18125. /* 5. r = u */
  18126. _sp_copy(u, r);
  18127. }
  18128. FREE_SP_INT_ARRAY(d, NULL);
  18129. RESTORE_VECTOR_REGISTERS();
  18130. return err;
  18131. }
  18132. /* Calculates the Greatest Common Denominator (GCD) of a and b into r.
  18133. *
  18134. * Find the largest number that divides both a and b without remainder.
  18135. * r <= a, r <= b, a % r == 0, b % r == 0
  18136. *
  18137. * a and b are positive integers.
  18138. *
  18139. * @param [in] a SP integer of first operand.
  18140. * @param [in] b SP integer of second operand.
  18141. * @param [out] r SP integer to hold result.
  18142. *
  18143. * @return MP_OKAY on success.
  18144. * @return MP_VAL when a, b or r is NULL or too large.
  18145. * @return MP_MEM when dynamic memory allocation fails.
  18146. */
  18147. int sp_gcd(const sp_int* a, const sp_int* b, sp_int* r)
  18148. {
  18149. int err = MP_OKAY;
  18150. /* Validate parameters. */
  18151. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  18152. err = MP_VAL;
  18153. }
  18154. /* Check that we have space in numbers to do work. */
  18155. else if ((a->used >= SP_INT_DIGITS) || (b->used >= SP_INT_DIGITS)) {
  18156. err = MP_VAL;
  18157. }
  18158. /* Check that r is large enough to hold maximum sized result. */
  18159. else if (((a->used <= b->used) && (r->size < a->used)) ||
  18160. ((b->used < a->used) && (r->size < b->used))) {
  18161. err = MP_VAL;
  18162. }
  18163. #ifdef WOLFSSL_SP_INT_NEGATIVE
  18164. /* Algorithm doesn't work with negative numbers. */
  18165. else if ((a->sign == MP_NEG) || (b->sign == MP_NEG)) {
  18166. err = MP_VAL;
  18167. }
  18168. #endif
  18169. else if (sp_iszero(a)) {
  18170. /* GCD of 0 and 0 is undefined - all integers divide 0. */
  18171. if (sp_iszero(b)) {
  18172. err = MP_VAL;
  18173. }
  18174. else {
  18175. /* GCD of 0 and b is b - b divides 0. */
  18176. err = sp_copy(b, r);
  18177. }
  18178. }
  18179. else if (sp_iszero(b)) {
  18180. /* GCD of 0 and a is a - a divides 0. */
  18181. err = sp_copy(a, r);
  18182. }
  18183. else {
  18184. /* Calculate GCD. */
  18185. err = _sp_gcd(a, b, r);
  18186. }
  18187. return err;
  18188. }
  18189. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
  18190. #if !defined(NO_RSA) && defined(WOLFSSL_KEY_GEN) && \
  18191. (!defined(WC_RSA_BLINDING) || defined(HAVE_FIPS) || defined(HAVE_SELFTEST))
  18192. /* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
  18193. * Smallest number divisible by both numbers.
  18194. *
  18195. * a and b are positive integers.
  18196. *
  18197. * lcm(a, b) = (a / gcd(a, b)) * b
  18198. * Divide the common divisor from a and multiply by b.
  18199. *
  18200. * Algorithm:
  18201. * 1. t0 = gcd(a, b)
  18202. * 2. If a > b then
  18203. * 2.1. t1 = a / t0
  18204. * 2.2. r = b * t1
  18205. * 3. Else
  18206. * 3.1. t1 = b / t0
  18207. * 3.2. r = a * t1
  18208. *
  18209. * @param [in] a SP integer of first operand.
  18210. * @param [in] b SP integer of second operand.
  18211. * @param [out] r SP integer to hold result.
  18212. *
  18213. * @return MP_OKAY on success.
  18214. * @return MP_MEM when dynamic memory allocation fails.
  18215. */
  18216. static int _sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
  18217. {
  18218. int err = MP_OKAY;
  18219. /* Determine maximum digit length numbers will reach. */
  18220. unsigned int used = ((a->used >= b->used) ? a->used + 1: b->used + 1);
  18221. DECL_SP_INT_ARRAY(t, used, 2);
  18222. ALLOC_SP_INT_ARRAY(t, used, 2, err, NULL);
  18223. if (err == MP_OKAY) {
  18224. _sp_init_size(t[0], used);
  18225. _sp_init_size(t[1], used);
  18226. SAVE_VECTOR_REGISTERS(err = _svr_ret;);
  18227. if (err == MP_OKAY) {
  18228. /* 1. t0 = gcd(a, b) */
  18229. err = sp_gcd(a, b, t[0]);
  18230. }
  18231. if (err == MP_OKAY) {
  18232. /* Divide the greater by the common divisor and multiply by other
  18233. * to operate on the smallest length numbers.
  18234. */
  18235. /* 2. If a > b then */
  18236. if (_sp_cmp_abs(a, b) == MP_GT) {
  18237. /* 2.1. t1 = a / t0 */
  18238. err = sp_div(a, t[0], t[1], NULL);
  18239. if (err == MP_OKAY) {
  18240. /* 2.2. r = b * t1 */
  18241. err = sp_mul(b, t[1], r);
  18242. }
  18243. }
  18244. /* 3. Else */
  18245. else {
  18246. /* 3.1. t1 = b / t0 */
  18247. err = sp_div(b, t[0], t[1], NULL);
  18248. if (err == MP_OKAY) {
  18249. /* 3.2. r = a * t1 */
  18250. err = sp_mul(a, t[1], r);
  18251. }
  18252. }
  18253. }
  18254. RESTORE_VECTOR_REGISTERS();
  18255. }
  18256. FREE_SP_INT_ARRAY(t, NULL);
  18257. return err;
  18258. }
  18259. /* Calculates the Lowest Common Multiple (LCM) of a and b and stores in r.
  18260. * Smallest number divisible by both numbers.
  18261. *
  18262. * a and b are positive integers.
  18263. *
  18264. * @param [in] a SP integer of first operand.
  18265. * @param [in] b SP integer of second operand.
  18266. * @param [out] r SP integer to hold result.
  18267. *
  18268. * @return MP_OKAY on success.
  18269. * @return MP_VAL when a, b or r is NULL; or a or b is zero.
  18270. * @return MP_MEM when dynamic memory allocation fails.
  18271. */
  18272. int sp_lcm(const sp_int* a, const sp_int* b, sp_int* r)
  18273. {
  18274. int err = MP_OKAY;
  18275. /* Validate parameters. */
  18276. if ((a == NULL) || (b == NULL) || (r == NULL)) {
  18277. err = MP_VAL;
  18278. }
  18279. #ifdef WOLFSSL_SP_INT_NEGATIVE
  18280. /* Ensure a and b are positive. */
  18281. else if ((a->sign == MP_NEG) || (b->sign >= MP_NEG)) {
  18282. err = MP_VAL;
  18283. }
  18284. #endif
  18285. /* Ensure r has space for maximumal result. */
  18286. else if (r->size < a->used + b->used) {
  18287. err = MP_VAL;
  18288. }
  18289. /* LCM of 0 and any number is undefined as 0 is not in the set of values
  18290. * being used.
  18291. */
  18292. if ((err == MP_OKAY) && (mp_iszero(a) || mp_iszero(b))) {
  18293. err = MP_VAL;
  18294. }
  18295. if (err == MP_OKAY) {
  18296. /* Do operation. */
  18297. err = _sp_lcm(a, b, r);
  18298. }
  18299. return err;
  18300. }
  18301. #endif /* WOLFSSL_SP_MATH_ALL && !NO_RSA && WOLFSSL_KEY_GEN */
  18302. /* Returns the run time settings.
  18303. *
  18304. * @return Settings value.
  18305. */
  18306. word32 CheckRunTimeSettings(void)
  18307. {
  18308. return CTC_SETTINGS;
  18309. }
  18310. /* Returns the fast math settings.
  18311. *
  18312. * @return Setting - number of bits in a digit.
  18313. */
  18314. word32 CheckRunTimeFastMath(void)
  18315. {
  18316. return SP_WORD_SIZE;
  18317. }
  18318. #ifdef WOLFSSL_CHECK_MEM_ZERO
  18319. /* Add an MP to check.
  18320. *
  18321. * @param [in] name Name of address to check.
  18322. * @param [in] sp sp_int that needs to be checked.
  18323. */
  18324. void sp_memzero_add(const char* name, sp_int* sp)
  18325. {
  18326. wc_MemZero_Add(name, sp->dp, sp->size * sizeof(sp_digit));
  18327. }
  18328. /* Check the memory in the data pointer for memory that must be zero.
  18329. *
  18330. * @param [in] sp sp_int that needs to be checked.
  18331. */
  18332. void sp_memzero_check(sp_int* sp)
  18333. {
  18334. wc_MemZero_Check(sp->dp, sp->size * sizeof(sp_digit));
  18335. }
  18336. #endif /* WOLFSSL_CHECK_MEM_ZERO */
  18337. #if (!defined(WOLFSSL_SMALL_STACK) && !defined(SP_ALLOC)) || \
  18338. defined(WOLFSSL_SP_NO_MALLOC)
  18339. #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \
  18340. !defined(WOLFSSL_SP_NO_DYN_STACK)
  18341. #pragma GCC diagnostic pop
  18342. #endif
  18343. #endif
  18344. #endif /* WOLFSSL_SP_MATH || WOLFSSL_SP_MATH_ALL */