aes_gcm_asm.asm 483 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049505050515052505350545055505650575058505950605061506250635064506550665067506850695070507150725073507450755076507750785079508050815082508350845085508650875088508950905091509250935094509550965097509850995100510151025103510451055106510751085109511051115112511351145115511651175118511951205121512251235124512551265127512851295130513151325133513451355136513751385139514051415142514351445145514651475148514951505151515251535154515551565157515851595160516151625163516451655166516751685169517051715172517351745175517651775178517951805181518251835184518551865187518851895190519151925193519451955196519751985199520052015202520352045205520652075208520952105211521252135214521552165217521852195220522152225223522452255226522752285229523052315232523352345235523652375238523952405241524252435244524552465247524852495250525152525253525452555256525752585259526052615262526352645265526652675268526952705271527252735274527552765277527852795280528152825283528452855286528752885289529052915292529352945295529652975298529953005301530253035304530553065307530853095310531153125313531453155316531753185319532053215322532353245325532653275328532953305331533253335334533553365337533853395340534153425343534453455346534753485349535053515352535353545355535653575358535953605361536253635364536553665367536853695370537153725373537453755376537753785379538053815382538353845385538653875388538953905391539253935394539553965397539853995400540154025403540454055406540754085409541054115412541354145415541654175418541954205421542254235424542554265427542854295430543154325433543454355436543754385439544054415442544354445445544654475448544954505451545254535454545554565457545854595460546154625463546454655466546754685469547054715472547354745475547654775478547954805481548254835484548554865487548854895490549154925493549454955496549754985499550055015502550355045505550655075508550955105511551255135514551555165517551855195520552155225523552455255526552755285529553055315532553355345535553655375538553955405541554255435544554555465547554855495550555155525553555455555556555755585559556055615562556355645565556655675568556955705571557255735574557555765577557855795580558155825583558455855586558755885589559055915592559355945595559655975598559956005601560256035604560556065607560856095610561156125613561456155616561756185619562056215622562356245625562656275628562956305631563256335634563556365637563856395640564156425643564456455646564756485649565056515652565356545655565656575658565956605661566256635664566556665667566856695670567156725673567456755676567756785679568056815682568356845685568656875688568956905691569256935694569556965697569856995700570157025703570457055706570757085709571057115712571357145715571657175718571957205721572257235724572557265727572857295730573157325733573457355736573757385739574057415742574357445745574657475748574957505751575257535754575557565757575857595760576157625763576457655766576757685769577057715772577357745775577657775778577957805781578257835784578557865787578857895790579157925793579457955796579757985799580058015802580358045805580658075808580958105811581258135814581558165817581858195820582158225823582458255826582758285829583058315832583358345835583658375838583958405841584258435844584558465847584858495850585158525853585458555856585758585859586058615862586358645865586658675868586958705871587258735874587558765877587858795880588158825883588458855886588758885889589058915892589358945895589658975898589959005901590259035904590559065907590859095910591159125913591459155916591759185919592059215922592359245925592659275928592959305931593259335934593559365937593859395940594159425943594459455946594759485949595059515952595359545955595659575958595959605961596259635964596559665967596859695970597159725973597459755976597759785979598059815982598359845985598659875988598959905991599259935994599559965997599859996000600160026003600460056006600760086009601060116012601360146015601660176018601960206021602260236024602560266027602860296030603160326033603460356036603760386039604060416042604360446045604660476048604960506051605260536054605560566057605860596060606160626063606460656066606760686069607060716072607360746075607660776078607960806081608260836084608560866087608860896090609160926093609460956096609760986099610061016102610361046105610661076108610961106111611261136114611561166117611861196120612161226123612461256126612761286129613061316132613361346135613661376138613961406141614261436144614561466147614861496150615161526153615461556156615761586159616061616162616361646165616661676168616961706171617261736174617561766177617861796180618161826183618461856186618761886189619061916192619361946195619661976198619962006201620262036204620562066207620862096210621162126213621462156216621762186219622062216222622362246225622662276228622962306231623262336234623562366237623862396240624162426243624462456246624762486249625062516252625362546255625662576258625962606261626262636264626562666267626862696270627162726273627462756276627762786279628062816282628362846285628662876288628962906291629262936294629562966297629862996300630163026303630463056306630763086309631063116312631363146315631663176318631963206321632263236324632563266327632863296330633163326333633463356336633763386339634063416342634363446345634663476348634963506351635263536354635563566357635863596360636163626363636463656366636763686369637063716372637363746375637663776378637963806381638263836384638563866387638863896390639163926393639463956396639763986399640064016402640364046405640664076408640964106411641264136414641564166417641864196420642164226423642464256426642764286429643064316432643364346435643664376438643964406441644264436444644564466447644864496450645164526453645464556456645764586459646064616462646364646465646664676468646964706471647264736474647564766477647864796480648164826483648464856486648764886489649064916492649364946495649664976498649965006501650265036504650565066507650865096510651165126513651465156516651765186519652065216522652365246525652665276528652965306531653265336534653565366537653865396540654165426543654465456546654765486549655065516552655365546555655665576558655965606561656265636564656565666567656865696570657165726573657465756576657765786579658065816582658365846585658665876588658965906591659265936594659565966597659865996600660166026603660466056606660766086609661066116612661366146615661666176618661966206621662266236624662566266627662866296630663166326633663466356636663766386639664066416642664366446645664666476648664966506651665266536654665566566657665866596660666166626663666466656666666766686669667066716672667366746675667666776678667966806681668266836684668566866687668866896690669166926693669466956696669766986699670067016702670367046705670667076708670967106711671267136714671567166717671867196720672167226723672467256726672767286729673067316732673367346735673667376738673967406741674267436744674567466747674867496750675167526753675467556756675767586759676067616762676367646765676667676768676967706771677267736774677567766777677867796780678167826783678467856786678767886789679067916792679367946795679667976798679968006801680268036804680568066807680868096810681168126813681468156816681768186819682068216822682368246825682668276828682968306831683268336834683568366837683868396840684168426843684468456846684768486849685068516852685368546855685668576858685968606861686268636864686568666867686868696870687168726873687468756876687768786879688068816882688368846885688668876888688968906891689268936894689568966897689868996900690169026903690469056906690769086909691069116912691369146915691669176918691969206921692269236924692569266927692869296930693169326933693469356936693769386939694069416942694369446945694669476948694969506951695269536954695569566957695869596960696169626963696469656966696769686969697069716972697369746975697669776978697969806981698269836984698569866987698869896990699169926993699469956996699769986999700070017002700370047005700670077008700970107011701270137014701570167017701870197020702170227023702470257026702770287029703070317032703370347035703670377038703970407041704270437044704570467047704870497050705170527053705470557056705770587059706070617062706370647065706670677068706970707071707270737074707570767077707870797080708170827083708470857086708770887089709070917092709370947095709670977098709971007101710271037104710571067107710871097110711171127113711471157116711771187119712071217122712371247125712671277128712971307131713271337134713571367137713871397140714171427143714471457146714771487149715071517152715371547155715671577158715971607161716271637164716571667167716871697170717171727173717471757176717771787179718071817182718371847185718671877188718971907191719271937194719571967197719871997200720172027203720472057206720772087209721072117212721372147215721672177218721972207221722272237224722572267227722872297230723172327233723472357236723772387239724072417242724372447245724672477248724972507251725272537254725572567257725872597260726172627263726472657266726772687269727072717272727372747275727672777278727972807281728272837284728572867287728872897290729172927293729472957296729772987299730073017302730373047305730673077308730973107311731273137314731573167317731873197320732173227323732473257326732773287329733073317332733373347335733673377338733973407341734273437344734573467347734873497350735173527353735473557356735773587359736073617362736373647365736673677368736973707371737273737374737573767377737873797380738173827383738473857386738773887389739073917392739373947395739673977398739974007401740274037404740574067407740874097410741174127413741474157416741774187419742074217422742374247425742674277428742974307431743274337434743574367437743874397440744174427443744474457446744774487449745074517452745374547455745674577458745974607461746274637464746574667467746874697470747174727473747474757476747774787479748074817482748374847485748674877488748974907491749274937494749574967497749874997500750175027503750475057506750775087509751075117512751375147515751675177518751975207521752275237524752575267527752875297530753175327533753475357536753775387539754075417542754375447545754675477548754975507551755275537554755575567557755875597560756175627563756475657566756775687569757075717572757375747575757675777578757975807581758275837584758575867587758875897590759175927593759475957596759775987599760076017602760376047605760676077608760976107611761276137614761576167617761876197620762176227623762476257626762776287629763076317632763376347635763676377638763976407641764276437644764576467647764876497650765176527653765476557656765776587659766076617662766376647665766676677668766976707671767276737674767576767677767876797680768176827683768476857686768776887689769076917692769376947695769676977698769977007701770277037704770577067707770877097710771177127713771477157716771777187719772077217722772377247725772677277728772977307731773277337734773577367737773877397740774177427743774477457746774777487749775077517752775377547755775677577758775977607761776277637764776577667767776877697770777177727773777477757776777777787779778077817782778377847785778677877788778977907791779277937794779577967797779877997800780178027803780478057806780778087809781078117812781378147815781678177818781978207821782278237824782578267827782878297830783178327833783478357836783778387839784078417842784378447845784678477848784978507851785278537854785578567857785878597860786178627863786478657866786778687869787078717872787378747875787678777878787978807881788278837884788578867887788878897890789178927893789478957896789778987899790079017902790379047905790679077908790979107911791279137914791579167917791879197920792179227923792479257926792779287929793079317932793379347935793679377938793979407941794279437944794579467947794879497950795179527953795479557956795779587959796079617962796379647965796679677968796979707971797279737974797579767977797879797980798179827983798479857986798779887989799079917992799379947995799679977998799980008001800280038004800580068007800880098010801180128013801480158016801780188019802080218022802380248025802680278028802980308031803280338034803580368037803880398040804180428043804480458046804780488049805080518052805380548055805680578058805980608061806280638064806580668067806880698070807180728073807480758076807780788079808080818082808380848085808680878088808980908091809280938094809580968097809880998100810181028103810481058106810781088109811081118112811381148115811681178118811981208121812281238124812581268127812881298130813181328133813481358136813781388139814081418142814381448145814681478148814981508151815281538154815581568157815881598160816181628163816481658166816781688169817081718172817381748175817681778178817981808181818281838184818581868187818881898190819181928193819481958196819781988199820082018202820382048205820682078208820982108211821282138214821582168217821882198220822182228223822482258226822782288229823082318232823382348235823682378238823982408241824282438244824582468247824882498250825182528253825482558256825782588259826082618262826382648265826682678268826982708271827282738274827582768277827882798280828182828283828482858286828782888289829082918292829382948295829682978298829983008301830283038304830583068307830883098310831183128313831483158316831783188319832083218322832383248325832683278328832983308331833283338334833583368337833883398340834183428343834483458346834783488349835083518352835383548355835683578358835983608361836283638364836583668367836883698370837183728373837483758376837783788379838083818382838383848385838683878388838983908391839283938394839583968397839883998400840184028403840484058406840784088409841084118412841384148415841684178418841984208421842284238424842584268427842884298430843184328433843484358436843784388439844084418442844384448445844684478448844984508451845284538454845584568457845884598460846184628463846484658466846784688469847084718472847384748475847684778478847984808481848284838484848584868487848884898490849184928493849484958496849784988499850085018502850385048505850685078508850985108511851285138514851585168517851885198520852185228523852485258526852785288529853085318532853385348535853685378538853985408541854285438544854585468547854885498550855185528553855485558556855785588559856085618562856385648565856685678568856985708571857285738574857585768577857885798580858185828583858485858586858785888589859085918592859385948595859685978598859986008601860286038604860586068607860886098610861186128613861486158616861786188619862086218622862386248625862686278628862986308631863286338634863586368637863886398640864186428643864486458646864786488649865086518652865386548655865686578658865986608661866286638664866586668667866886698670867186728673867486758676867786788679868086818682868386848685868686878688868986908691869286938694869586968697869886998700870187028703870487058706870787088709871087118712871387148715871687178718871987208721872287238724872587268727872887298730873187328733873487358736873787388739874087418742874387448745874687478748874987508751875287538754875587568757875887598760876187628763876487658766876787688769877087718772877387748775877687778778877987808781878287838784878587868787878887898790879187928793879487958796879787988799880088018802880388048805880688078808880988108811881288138814881588168817881888198820882188228823882488258826882788288829883088318832883388348835883688378838883988408841884288438844884588468847884888498850885188528853885488558856885788588859886088618862886388648865886688678868886988708871887288738874887588768877887888798880888188828883888488858886888788888889889088918892889388948895889688978898889989008901890289038904890589068907890889098910891189128913891489158916891789188919892089218922892389248925892689278928892989308931893289338934893589368937893889398940894189428943894489458946894789488949895089518952895389548955895689578958895989608961896289638964896589668967896889698970897189728973897489758976897789788979898089818982898389848985898689878988898989908991899289938994899589968997899889999000900190029003900490059006900790089009901090119012901390149015901690179018901990209021902290239024902590269027902890299030903190329033903490359036903790389039904090419042904390449045904690479048904990509051905290539054905590569057905890599060906190629063906490659066906790689069907090719072907390749075907690779078907990809081908290839084908590869087908890899090909190929093909490959096909790989099910091019102910391049105910691079108910991109111911291139114911591169117911891199120912191229123912491259126912791289129913091319132913391349135913691379138913991409141914291439144914591469147914891499150915191529153915491559156915791589159916091619162916391649165916691679168916991709171917291739174917591769177917891799180918191829183918491859186918791889189919091919192919391949195919691979198919992009201920292039204920592069207920892099210921192129213921492159216921792189219922092219222922392249225922692279228922992309231923292339234923592369237923892399240924192429243924492459246924792489249925092519252925392549255925692579258925992609261926292639264926592669267926892699270927192729273927492759276927792789279928092819282928392849285928692879288928992909291929292939294929592969297929892999300930193029303930493059306930793089309931093119312931393149315931693179318931993209321932293239324932593269327932893299330933193329333933493359336933793389339934093419342934393449345934693479348934993509351935293539354935593569357935893599360936193629363936493659366936793689369937093719372937393749375937693779378937993809381938293839384938593869387938893899390939193929393939493959396939793989399940094019402940394049405940694079408940994109411941294139414941594169417941894199420942194229423942494259426942794289429943094319432943394349435943694379438943994409441944294439444944594469447944894499450945194529453945494559456945794589459946094619462946394649465946694679468946994709471947294739474947594769477947894799480948194829483948494859486948794889489949094919492949394949495949694979498949995009501950295039504950595069507950895099510951195129513951495159516951795189519952095219522952395249525952695279528952995309531953295339534953595369537953895399540954195429543954495459546954795489549955095519552955395549555955695579558955995609561956295639564956595669567956895699570957195729573957495759576957795789579958095819582958395849585958695879588958995909591959295939594959595969597959895999600960196029603960496059606960796089609961096119612961396149615961696179618961996209621962296239624962596269627962896299630963196329633963496359636963796389639964096419642964396449645964696479648964996509651965296539654965596569657965896599660966196629663966496659666966796689669967096719672967396749675967696779678967996809681968296839684968596869687968896899690969196929693969496959696969796989699970097019702970397049705970697079708970997109711971297139714971597169717971897199720972197229723972497259726972797289729973097319732973397349735973697379738973997409741974297439744974597469747974897499750975197529753975497559756975797589759976097619762976397649765976697679768976997709771977297739774977597769777977897799780978197829783978497859786978797889789979097919792979397949795979697979798979998009801980298039804980598069807980898099810981198129813981498159816981798189819982098219822982398249825982698279828982998309831983298339834983598369837983898399840984198429843984498459846984798489849985098519852985398549855985698579858985998609861986298639864986598669867986898699870987198729873987498759876987798789879988098819882988398849885988698879888988998909891989298939894989598969897989898999900990199029903990499059906990799089909991099119912991399149915991699179918991999209921992299239924992599269927992899299930993199329933993499359936993799389939994099419942994399449945994699479948994999509951995299539954995599569957995899599960996199629963996499659966996799689969997099719972997399749975997699779978997999809981998299839984998599869987998899899990999199929993999499959996999799989999100001000110002100031000410005100061000710008100091001010011100121001310014100151001610017100181001910020100211002210023100241002510026100271002810029100301003110032100331003410035100361003710038100391004010041100421004310044100451004610047100481004910050100511005210053100541005510056100571005810059100601006110062100631006410065100661006710068100691007010071100721007310074100751007610077100781007910080100811008210083100841008510086100871008810089100901009110092100931009410095100961009710098100991010010101101021010310104101051010610107101081010910110101111011210113101141011510116101171011810119101201012110122101231012410125101261012710128101291013010131101321013310134101351013610137101381013910140101411014210143101441014510146101471014810149101501015110152101531015410155101561015710158101591016010161101621016310164101651016610167101681016910170101711017210173101741017510176101771017810179101801018110182101831018410185101861018710188101891019010191101921019310194101951019610197101981019910200102011020210203102041020510206102071020810209102101021110212102131021410215102161021710218102191022010221102221022310224102251022610227102281022910230102311023210233102341023510236102371023810239102401024110242102431024410245102461024710248102491025010251102521025310254102551025610257102581025910260102611026210263102641026510266102671026810269102701027110272102731027410275102761027710278102791028010281102821028310284102851028610287102881028910290102911029210293102941029510296102971029810299103001030110302103031030410305103061030710308103091031010311103121031310314103151031610317103181031910320103211032210323103241032510326103271032810329103301033110332103331033410335103361033710338103391034010341103421034310344103451034610347103481034910350103511035210353103541035510356103571035810359103601036110362103631036410365103661036710368103691037010371103721037310374103751037610377103781037910380103811038210383103841038510386103871038810389103901039110392103931039410395103961039710398103991040010401104021040310404104051040610407104081040910410104111041210413104141041510416104171041810419104201042110422104231042410425104261042710428104291043010431104321043310434104351043610437104381043910440104411044210443104441044510446104471044810449104501045110452104531045410455104561045710458104591046010461104621046310464104651046610467104681046910470104711047210473104741047510476104771047810479104801048110482104831048410485104861048710488104891049010491104921049310494104951049610497104981049910500105011050210503105041050510506105071050810509105101051110512105131051410515105161051710518105191052010521105221052310524105251052610527105281052910530105311053210533105341053510536105371053810539105401054110542105431054410545105461054710548105491055010551105521055310554105551055610557105581055910560105611056210563105641056510566105671056810569105701057110572105731057410575105761057710578105791058010581105821058310584105851058610587105881058910590105911059210593105941059510596105971059810599106001060110602106031060410605106061060710608106091061010611106121061310614106151061610617106181061910620106211062210623106241062510626106271062810629106301063110632106331063410635106361063710638106391064010641106421064310644106451064610647106481064910650106511065210653106541065510656106571065810659106601066110662106631066410665106661066710668106691067010671106721067310674106751067610677106781067910680106811068210683106841068510686106871068810689106901069110692106931069410695106961069710698106991070010701107021070310704107051070610707107081070910710107111071210713107141071510716107171071810719107201072110722107231072410725107261072710728107291073010731107321073310734107351073610737107381073910740107411074210743107441074510746107471074810749107501075110752107531075410755107561075710758107591076010761107621076310764107651076610767107681076910770107711077210773107741077510776107771077810779107801078110782107831078410785107861078710788107891079010791107921079310794107951079610797107981079910800108011080210803108041080510806108071080810809108101081110812108131081410815108161081710818108191082010821108221082310824108251082610827108281082910830108311083210833108341083510836108371083810839108401084110842108431084410845108461084710848108491085010851108521085310854108551085610857108581085910860108611086210863108641086510866108671086810869108701087110872108731087410875108761087710878108791088010881108821088310884108851088610887108881088910890108911089210893108941089510896108971089810899109001090110902109031090410905109061090710908109091091010911109121091310914109151091610917109181091910920109211092210923109241092510926109271092810929109301093110932109331093410935109361093710938109391094010941109421094310944109451094610947109481094910950109511095210953109541095510956109571095810959109601096110962109631096410965109661096710968109691097010971109721097310974109751097610977109781097910980109811098210983109841098510986109871098810989109901099110992109931099410995109961099710998109991100011001110021100311004110051100611007110081100911010110111101211013110141101511016110171101811019110201102111022110231102411025110261102711028110291103011031110321103311034110351103611037110381103911040110411104211043110441104511046110471104811049110501105111052110531105411055110561105711058110591106011061110621106311064110651106611067110681106911070110711107211073110741107511076110771107811079110801108111082110831108411085110861108711088110891109011091110921109311094110951109611097110981109911100111011110211103111041110511106111071110811109111101111111112111131111411115111161111711118111191112011121111221112311124111251112611127111281112911130111311113211133111341113511136111371113811139111401114111142111431114411145111461114711148111491115011151111521115311154111551115611157111581115911160111611116211163111641116511166111671116811169111701117111172111731117411175111761117711178111791118011181111821118311184111851118611187111881118911190111911119211193111941119511196111971119811199112001120111202112031120411205112061120711208112091121011211112121121311214112151121611217112181121911220112211122211223112241122511226112271122811229112301123111232112331123411235112361123711238112391124011241112421124311244112451124611247112481124911250112511125211253112541125511256112571125811259112601126111262112631126411265112661126711268112691127011271112721127311274112751127611277112781127911280112811128211283112841128511286112871128811289112901129111292112931129411295112961129711298112991130011301113021130311304113051130611307113081130911310113111131211313113141131511316113171131811319113201132111322113231132411325113261132711328113291133011331113321133311334113351133611337113381133911340113411134211343113441134511346113471134811349113501135111352113531135411355113561135711358113591136011361113621136311364113651136611367113681136911370113711137211373113741137511376113771137811379113801138111382113831138411385113861138711388113891139011391113921139311394113951139611397113981139911400114011140211403114041140511406114071140811409114101141111412114131141411415114161141711418114191142011421114221142311424114251142611427114281142911430114311143211433114341143511436114371143811439114401144111442114431144411445114461144711448114491145011451114521145311454114551145611457114581145911460114611146211463114641146511466114671146811469114701147111472114731147411475114761147711478114791148011481114821148311484114851148611487114881148911490114911149211493114941149511496114971149811499115001150111502115031150411505115061150711508115091151011511115121151311514115151151611517115181151911520115211152211523115241152511526115271152811529115301153111532115331153411535115361153711538115391154011541115421154311544115451154611547115481154911550115511155211553115541155511556115571155811559115601156111562115631156411565115661156711568115691157011571115721157311574115751157611577115781157911580115811158211583115841158511586115871158811589115901159111592115931159411595115961159711598115991160011601116021160311604116051160611607116081160911610116111161211613116141161511616116171161811619116201162111622116231162411625116261162711628116291163011631116321163311634116351163611637116381163911640116411164211643116441164511646116471164811649116501165111652116531165411655116561165711658116591166011661116621166311664116651166611667116681166911670116711167211673116741167511676116771167811679116801168111682116831168411685116861168711688116891169011691116921169311694116951169611697116981169911700117011170211703117041170511706117071170811709117101171111712117131171411715117161171711718117191172011721117221172311724117251172611727117281172911730117311173211733117341173511736117371173811739117401174111742117431174411745117461174711748117491175011751117521175311754117551175611757117581175911760117611176211763117641176511766117671176811769117701177111772117731177411775117761177711778117791178011781117821178311784117851178611787117881178911790117911179211793117941179511796117971179811799118001180111802118031180411805118061180711808118091181011811118121181311814118151181611817118181181911820118211182211823118241182511826118271182811829118301183111832118331183411835118361183711838118391184011841118421184311844118451184611847118481184911850118511185211853118541185511856118571185811859118601186111862118631186411865118661186711868118691187011871118721187311874118751187611877118781187911880118811188211883118841188511886118871188811889118901189111892118931189411895118961189711898118991190011901119021190311904119051190611907119081190911910119111191211913119141191511916119171191811919119201192111922119231192411925119261192711928119291193011931119321193311934119351193611937119381193911940119411194211943119441194511946119471194811949119501195111952119531195411955119561195711958119591196011961119621196311964119651196611967119681196911970119711197211973119741197511976119771197811979119801198111982119831198411985119861198711988119891199011991119921199311994119951199611997119981199912000120011200212003120041200512006120071200812009120101201112012120131201412015120161201712018120191202012021120221202312024120251202612027120281202912030120311203212033120341203512036120371203812039120401204112042120431204412045120461204712048120491205012051120521205312054120551205612057120581205912060120611206212063120641206512066120671206812069120701207112072120731207412075120761207712078120791208012081120821208312084120851208612087120881208912090120911209212093120941209512096120971209812099121001210112102121031210412105121061210712108121091211012111121121211312114121151211612117121181211912120121211212212123121241212512126121271212812129121301213112132121331213412135121361213712138121391214012141121421214312144121451214612147121481214912150121511215212153121541215512156121571215812159121601216112162121631216412165121661216712168121691217012171121721217312174121751217612177121781217912180121811218212183121841218512186121871218812189121901219112192121931219412195121961219712198121991220012201122021220312204122051220612207122081220912210122111221212213122141221512216122171221812219122201222112222122231222412225122261222712228122291223012231122321223312234122351223612237122381223912240122411224212243122441224512246122471224812249122501225112252122531225412255122561225712258122591226012261122621226312264122651226612267122681226912270122711227212273122741227512276122771227812279122801228112282122831228412285122861228712288122891229012291122921229312294122951229612297122981229912300123011230212303123041230512306123071230812309123101231112312123131231412315123161231712318123191232012321123221232312324123251232612327123281232912330123311233212333123341233512336123371233812339123401234112342123431234412345123461234712348123491235012351123521235312354123551235612357123581235912360123611236212363123641236512366123671236812369123701237112372123731237412375123761237712378123791238012381123821238312384123851238612387123881238912390123911239212393123941239512396123971239812399124001240112402124031240412405124061240712408124091241012411124121241312414124151241612417124181241912420124211242212423124241242512426124271242812429124301243112432124331243412435124361243712438124391244012441124421244312444124451244612447124481244912450124511245212453124541245512456124571245812459124601246112462124631246412465124661246712468124691247012471124721247312474124751247612477124781247912480124811248212483124841248512486124871248812489124901249112492124931249412495124961249712498124991250012501125021250312504125051250612507125081250912510125111251212513125141251512516125171251812519125201252112522125231252412525125261252712528125291253012531125321253312534125351253612537125381253912540125411254212543125441254512546125471254812549125501255112552125531255412555125561255712558125591256012561125621256312564125651256612567125681256912570125711257212573125741257512576125771257812579125801258112582125831258412585125861258712588125891259012591125921259312594125951259612597125981259912600126011260212603126041260512606126071260812609126101261112612126131261412615126161261712618126191262012621126221262312624126251262612627126281262912630126311263212633126341263512636126371263812639126401264112642126431264412645126461264712648126491265012651126521265312654126551265612657126581265912660126611266212663126641266512666126671266812669126701267112672126731267412675126761267712678126791268012681126821268312684126851268612687126881268912690126911269212693126941269512696126971269812699127001270112702127031270412705127061270712708127091271012711127121271312714127151271612717127181271912720127211272212723127241272512726127271272812729127301273112732127331273412735127361273712738127391274012741127421274312744127451274612747127481274912750127511275212753127541275512756127571275812759127601276112762127631276412765127661276712768127691277012771127721277312774127751277612777127781277912780127811278212783127841278512786127871278812789127901279112792127931279412795127961279712798127991280012801128021280312804128051280612807128081280912810128111281212813128141281512816128171281812819128201282112822128231282412825128261282712828128291283012831128321283312834128351283612837128381283912840128411284212843128441284512846128471284812849128501285112852128531285412855128561285712858128591286012861128621286312864128651286612867128681286912870128711287212873128741287512876128771287812879128801288112882128831288412885128861288712888128891289012891128921289312894128951289612897128981289912900129011290212903129041290512906129071290812909129101291112912129131291412915129161291712918129191292012921129221292312924129251292612927129281292912930129311293212933129341293512936129371293812939129401294112942129431294412945129461294712948129491295012951129521295312954129551295612957129581295912960129611296212963129641296512966129671296812969129701297112972129731297412975129761297712978129791298012981129821298312984129851298612987129881298912990129911299212993129941299512996129971299812999130001300113002130031300413005130061300713008130091301013011130121301313014130151301613017130181301913020130211302213023130241302513026130271302813029130301303113032130331303413035130361303713038130391304013041130421304313044130451304613047130481304913050130511305213053130541305513056130571305813059130601306113062130631306413065130661306713068130691307013071130721307313074130751307613077130781307913080130811308213083130841308513086130871308813089130901309113092130931309413095130961309713098130991310013101131021310313104131051310613107131081310913110131111311213113131141311513116131171311813119131201312113122131231312413125131261312713128131291313013131131321313313134131351313613137131381313913140131411314213143131441314513146131471314813149131501315113152131531315413155131561315713158131591316013161131621316313164131651316613167131681316913170131711317213173131741317513176131771317813179131801318113182131831318413185131861318713188131891319013191131921319313194131951319613197131981319913200132011320213203132041320513206132071320813209132101321113212132131321413215132161321713218132191322013221132221322313224132251322613227132281322913230132311323213233132341323513236132371323813239132401324113242132431324413245132461324713248132491325013251132521325313254132551325613257132581325913260132611326213263132641326513266132671326813269132701327113272132731327413275132761327713278132791328013281132821328313284132851328613287132881328913290132911329213293132941329513296132971329813299133001330113302133031330413305133061330713308133091331013311133121331313314133151331613317133181331913320133211332213323133241332513326133271332813329133301333113332133331333413335133361333713338133391334013341133421334313344133451334613347133481334913350133511335213353133541335513356133571335813359133601336113362133631336413365133661336713368133691337013371133721337313374133751337613377133781337913380133811338213383133841338513386133871338813389133901339113392133931339413395133961339713398133991340013401134021340313404134051340613407134081340913410134111341213413134141341513416134171341813419134201342113422134231342413425134261342713428134291343013431134321343313434134351343613437134381343913440134411344213443134441344513446134471344813449134501345113452134531345413455134561345713458134591346013461134621346313464134651346613467134681346913470134711347213473134741347513476134771347813479134801348113482134831348413485134861348713488134891349013491134921349313494134951349613497134981349913500135011350213503135041350513506135071350813509135101351113512135131351413515135161351713518135191352013521135221352313524135251352613527135281352913530135311353213533135341353513536135371353813539135401354113542135431354413545135461354713548135491355013551135521355313554135551355613557135581355913560135611356213563135641356513566135671356813569135701357113572135731357413575135761357713578135791358013581135821358313584135851358613587135881358913590135911359213593135941359513596135971359813599136001360113602136031360413605136061360713608136091361013611136121361313614136151361613617136181361913620136211362213623136241362513626136271362813629136301363113632136331363413635136361363713638136391364013641136421364313644136451364613647136481364913650136511365213653136541365513656136571365813659136601366113662136631366413665136661366713668136691367013671136721367313674136751367613677136781367913680136811368213683136841368513686136871368813689136901369113692136931369413695136961369713698136991370013701137021370313704137051370613707137081370913710137111371213713137141371513716137171371813719137201372113722137231372413725137261372713728137291373013731137321373313734137351373613737137381373913740137411374213743137441374513746137471374813749137501375113752137531375413755137561375713758137591376013761137621376313764137651376613767137681376913770137711377213773137741377513776137771377813779137801378113782137831378413785137861378713788137891379013791137921379313794137951379613797137981379913800138011380213803138041380513806138071380813809138101381113812138131381413815138161381713818138191382013821138221382313824138251382613827138281382913830138311383213833138341383513836138371383813839138401384113842138431384413845138461384713848138491385013851138521385313854138551385613857138581385913860138611386213863138641386513866138671386813869138701387113872138731387413875138761387713878138791388013881138821388313884138851388613887138881388913890138911389213893138941389513896138971389813899139001390113902139031390413905139061390713908139091391013911139121391313914139151391613917139181391913920139211392213923139241392513926139271392813929139301393113932139331393413935139361393713938139391394013941139421394313944139451394613947139481394913950139511395213953139541395513956139571395813959139601396113962139631396413965139661396713968139691397013971139721397313974139751397613977139781397913980139811398213983139841398513986139871398813989139901399113992139931399413995139961399713998139991400014001140021400314004140051400614007140081400914010140111401214013140141401514016140171401814019140201402114022140231402414025140261402714028140291403014031140321403314034140351403614037140381403914040140411404214043140441404514046140471404814049140501405114052140531405414055140561405714058140591406014061140621406314064140651406614067140681406914070140711407214073140741407514076140771407814079140801408114082140831408414085140861408714088140891409014091140921409314094140951409614097140981409914100141011410214103141041410514106141071410814109141101411114112141131411414115141161411714118141191412014121141221412314124141251412614127141281412914130141311413214133141341413514136141371413814139141401414114142141431414414145141461414714148141491415014151141521415314154141551415614157141581415914160141611416214163141641416514166141671416814169141701417114172141731417414175141761417714178141791418014181141821418314184141851418614187141881418914190141911419214193141941419514196141971419814199142001420114202142031420414205142061420714208142091421014211142121421314214142151421614217142181421914220142211422214223142241422514226142271422814229142301423114232142331423414235142361423714238142391424014241142421424314244142451424614247142481424914250142511425214253142541425514256142571425814259142601426114262142631426414265142661426714268142691427014271142721427314274142751427614277142781427914280142811428214283142841428514286142871428814289142901429114292142931429414295142961429714298142991430014301143021430314304143051430614307143081430914310143111431214313143141431514316143171431814319143201432114322143231432414325143261432714328143291433014331143321433314334143351433614337143381433914340143411434214343143441434514346143471434814349143501435114352143531435414355143561435714358143591436014361143621436314364143651436614367143681436914370143711437214373143741437514376143771437814379143801438114382143831438414385143861438714388143891439014391143921439314394143951439614397143981439914400144011440214403144041440514406144071440814409144101441114412144131441414415144161441714418144191442014421144221442314424144251442614427144281442914430144311443214433144341443514436144371443814439144401444114442144431444414445144461444714448144491445014451144521445314454144551445614457144581445914460144611446214463144641446514466144671446814469144701447114472144731447414475144761447714478144791448014481144821448314484144851448614487144881448914490144911449214493144941449514496144971449814499145001450114502145031450414505145061450714508145091451014511145121451314514145151451614517145181451914520145211452214523145241452514526145271452814529145301453114532145331453414535145361453714538145391454014541145421454314544145451454614547145481454914550145511455214553145541455514556145571455814559145601456114562145631456414565145661456714568145691457014571145721457314574145751457614577145781457914580145811458214583145841458514586145871458814589145901459114592145931459414595145961459714598145991460014601146021460314604146051460614607146081460914610146111461214613146141461514616146171461814619146201462114622146231462414625146261462714628146291463014631146321463314634146351463614637146381463914640146411464214643146441464514646146471464814649146501465114652146531465414655146561465714658146591466014661146621466314664146651466614667146681466914670146711467214673146741467514676146771467814679146801468114682146831468414685146861468714688146891469014691146921469314694146951469614697146981469914700147011470214703147041470514706147071470814709147101471114712147131471414715147161471714718147191472014721147221472314724147251472614727147281472914730147311473214733147341473514736147371473814739147401474114742147431474414745147461474714748147491475014751147521475314754147551475614757147581475914760147611476214763147641476514766147671476814769147701477114772147731477414775147761477714778147791478014781147821478314784147851478614787147881478914790147911479214793147941479514796147971479814799148001480114802148031480414805148061480714808148091481014811148121481314814148151481614817148181481914820148211482214823148241482514826148271482814829148301483114832148331483414835148361483714838148391484014841148421484314844148451484614847148481484914850148511485214853148541485514856148571485814859148601486114862148631486414865148661486714868148691487014871148721487314874148751487614877148781487914880148811488214883148841488514886148871488814889148901489114892148931489414895148961489714898148991490014901149021490314904149051490614907149081490914910149111491214913149141491514916149171491814919149201492114922149231492414925149261492714928149291493014931149321493314934149351493614937149381493914940149411494214943149441494514946149471494814949149501495114952149531495414955149561495714958149591496014961149621496314964149651496614967149681496914970149711497214973149741497514976149771497814979149801498114982149831498414985149861498714988149891499014991149921499314994149951499614997149981499915000150011500215003150041500515006150071500815009150101501115012150131501415015150161501715018150191502015021150221502315024150251502615027150281502915030150311503215033150341503515036150371503815039150401504115042150431504415045150461504715048150491505015051150521505315054150551505615057150581505915060150611506215063150641506515066150671506815069150701507115072150731507415075150761507715078150791508015081150821508315084150851508615087150881508915090150911509215093150941509515096150971509815099151001510115102151031510415105151061510715108151091511015111151121511315114151151511615117151181511915120151211512215123151241512515126151271512815129151301513115132151331513415135151361513715138151391514015141151421514315144151451514615147151481514915150151511515215153151541515515156151571515815159151601516115162151631516415165151661516715168151691517015171151721517315174151751517615177151781517915180151811518215183151841518515186151871518815189151901519115192151931519415195151961519715198151991520015201152021520315204152051520615207152081520915210152111521215213152141521515216152171521815219152201522115222152231522415225152261522715228152291523015231152321523315234152351523615237152381523915240152411524215243152441524515246152471524815249152501525115252152531525415255152561525715258152591526015261152621526315264152651526615267152681526915270152711527215273152741527515276152771527815279152801528115282152831528415285152861528715288152891529015291152921529315294152951529615297152981529915300153011530215303153041530515306153071530815309153101531115312153131531415315153161531715318153191532015321153221532315324153251532615327153281532915330153311533215333153341533515336153371533815339153401534115342153431534415345153461534715348153491535015351153521535315354153551535615357153581535915360153611536215363153641536515366153671536815369153701537115372153731537415375153761537715378153791538015381153821538315384153851538615387153881538915390153911539215393153941539515396153971539815399154001540115402154031540415405154061540715408154091541015411154121541315414154151541615417154181541915420154211542215423
  1. ; /* aes_gcm_asm
  2. ; *
  3. ; * Copyright (C) 2006-2023 wolfSSL Inc.
  4. ; *
  5. ; * This file is part of wolfSSL.
  6. ; *
  7. ; * wolfSSL is free software; you can redistribute it and/or modify
  8. ; * it under the terms of the GNU General Public License as published by
  9. ; * the Free Software Foundation; either version 2 of the License, or
  10. ; * (at your option) any later version.
  11. ; *
  12. ; * wolfSSL is distributed in the hope that it will be useful,
  13. ; * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. ; * GNU General Public License for more details.
  16. ; *
  17. ; * You should have received a copy of the GNU General Public License
  18. ; * along with this program; if not, write to the Free Software
  19. ; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
  20. ; */
  21. IF @Version LT 1200
  22. ; AVX2 instructions not recognized by old versions of MASM
  23. IFNDEF NO_AVX2_SUPPORT
  24. NO_AVX2_SUPPORT = 1
  25. ENDIF
  26. ; MOVBE instruction not recognized by old versions of MASM
  27. IFNDEF NO_MOVBE_SUPPORT
  28. NO_MOVBE_SUPPORT = 1
  29. ENDIF
  30. ENDIF
  31. IFNDEF HAVE_INTEL_AVX1
  32. HAVE_INTEL_AVX1 = 1
  33. ENDIF
  34. IFNDEF NO_AVX2_SUPPORT
  35. HAVE_INTEL_AVX2 = 1
  36. ENDIF
  37. IFNDEF _WIN64
  38. _WIN64 = 1
  39. ENDIF
  40. _DATA SEGMENT
  41. ALIGN 16
  42. L_aes_gcm_one QWORD 0, 1
  43. ptr_L_aes_gcm_one QWORD L_aes_gcm_one
  44. _DATA ENDS
  45. _DATA SEGMENT
  46. ALIGN 16
  47. L_aes_gcm_two QWORD 0, 2
  48. ptr_L_aes_gcm_two QWORD L_aes_gcm_two
  49. _DATA ENDS
  50. _DATA SEGMENT
  51. ALIGN 16
  52. L_aes_gcm_three QWORD 0, 3
  53. ptr_L_aes_gcm_three QWORD L_aes_gcm_three
  54. _DATA ENDS
  55. _DATA SEGMENT
  56. ALIGN 16
  57. L_aes_gcm_four QWORD 0, 4
  58. ptr_L_aes_gcm_four QWORD L_aes_gcm_four
  59. _DATA ENDS
  60. _DATA SEGMENT
  61. ALIGN 16
  62. L_aes_gcm_five QWORD 0, 5
  63. ptr_L_aes_gcm_five QWORD L_aes_gcm_five
  64. _DATA ENDS
  65. _DATA SEGMENT
  66. ALIGN 16
  67. L_aes_gcm_six QWORD 0, 6
  68. ptr_L_aes_gcm_six QWORD L_aes_gcm_six
  69. _DATA ENDS
  70. _DATA SEGMENT
  71. ALIGN 16
  72. L_aes_gcm_seven QWORD 0, 7
  73. ptr_L_aes_gcm_seven QWORD L_aes_gcm_seven
  74. _DATA ENDS
  75. _DATA SEGMENT
  76. ALIGN 16
  77. L_aes_gcm_eight QWORD 0, 8
  78. ptr_L_aes_gcm_eight QWORD L_aes_gcm_eight
  79. _DATA ENDS
  80. _DATA SEGMENT
  81. ALIGN 16
  82. L_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567
  83. ptr_L_aes_gcm_bswap_epi64 QWORD L_aes_gcm_bswap_epi64
  84. _DATA ENDS
  85. _DATA SEGMENT
  86. ALIGN 16
  87. L_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183
  88. ptr_L_aes_gcm_bswap_mask QWORD L_aes_gcm_bswap_mask
  89. _DATA ENDS
  90. _DATA SEGMENT
  91. ALIGN 16
  92. L_aes_gcm_mod2_128 QWORD 1, 13979173243358019584
  93. ptr_L_aes_gcm_mod2_128 QWORD L_aes_gcm_mod2_128
  94. _DATA ENDS
  95. _text SEGMENT READONLY PARA
  96. AES_GCM_encrypt PROC
  97. push r13
  98. push rdi
  99. push rsi
  100. push r12
  101. push rbx
  102. push r14
  103. push r15
  104. mov rdi, rcx
  105. mov rsi, rdx
  106. mov r12, r8
  107. mov rax, r9
  108. mov r8, QWORD PTR [rsp+96]
  109. mov r9d, DWORD PTR [rsp+104]
  110. mov r11d, DWORD PTR [rsp+112]
  111. mov ebx, DWORD PTR [rsp+120]
  112. mov r14d, DWORD PTR [rsp+128]
  113. mov r15, QWORD PTR [rsp+136]
  114. mov r10d, DWORD PTR [rsp+144]
  115. sub rsp, 160
  116. pxor xmm4, xmm4
  117. pxor xmm6, xmm6
  118. cmp ebx, 12
  119. mov edx, ebx
  120. jne L_AES_GCM_encrypt_iv_not_12
  121. ; # Calculate values when IV is 12 bytes
  122. ; Set counter based on IV
  123. mov ecx, 16777216
  124. pinsrq xmm4, QWORD PTR [rax], 0
  125. pinsrd xmm4, DWORD PTR [rax+8], 2
  126. pinsrd xmm4, ecx, 3
  127. ; H = Encrypt X(=0) and T = Encrypt counter
  128. movdqa xmm1, xmm4
  129. movdqa xmm5, OWORD PTR [r15]
  130. pxor xmm1, xmm5
  131. movdqa xmm7, OWORD PTR [r15+16]
  132. aesenc xmm5, xmm7
  133. aesenc xmm1, xmm7
  134. movdqa xmm7, OWORD PTR [r15+32]
  135. aesenc xmm5, xmm7
  136. aesenc xmm1, xmm7
  137. movdqa xmm7, OWORD PTR [r15+48]
  138. aesenc xmm5, xmm7
  139. aesenc xmm1, xmm7
  140. movdqa xmm7, OWORD PTR [r15+64]
  141. aesenc xmm5, xmm7
  142. aesenc xmm1, xmm7
  143. movdqa xmm7, OWORD PTR [r15+80]
  144. aesenc xmm5, xmm7
  145. aesenc xmm1, xmm7
  146. movdqa xmm7, OWORD PTR [r15+96]
  147. aesenc xmm5, xmm7
  148. aesenc xmm1, xmm7
  149. movdqa xmm7, OWORD PTR [r15+112]
  150. aesenc xmm5, xmm7
  151. aesenc xmm1, xmm7
  152. movdqa xmm7, OWORD PTR [r15+128]
  153. aesenc xmm5, xmm7
  154. aesenc xmm1, xmm7
  155. movdqa xmm7, OWORD PTR [r15+144]
  156. aesenc xmm5, xmm7
  157. aesenc xmm1, xmm7
  158. cmp r10d, 11
  159. movdqa xmm7, OWORD PTR [r15+160]
  160. jl L_AES_GCM_encrypt_calc_iv_12_last
  161. aesenc xmm5, xmm7
  162. aesenc xmm1, xmm7
  163. movdqa xmm7, OWORD PTR [r15+176]
  164. aesenc xmm5, xmm7
  165. aesenc xmm1, xmm7
  166. cmp r10d, 13
  167. movdqa xmm7, OWORD PTR [r15+192]
  168. jl L_AES_GCM_encrypt_calc_iv_12_last
  169. aesenc xmm5, xmm7
  170. aesenc xmm1, xmm7
  171. movdqa xmm7, OWORD PTR [r15+208]
  172. aesenc xmm5, xmm7
  173. aesenc xmm1, xmm7
  174. movdqa xmm7, OWORD PTR [r15+224]
  175. L_AES_GCM_encrypt_calc_iv_12_last:
  176. aesenclast xmm5, xmm7
  177. aesenclast xmm1, xmm7
  178. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  179. movdqu [rsp+144], xmm1
  180. jmp L_AES_GCM_encrypt_iv_done
  181. L_AES_GCM_encrypt_iv_not_12:
  182. ; Calculate values when IV is not 12 bytes
  183. ; H = Encrypt X(=0)
  184. movdqa xmm5, OWORD PTR [r15]
  185. aesenc xmm5, [r15+16]
  186. aesenc xmm5, [r15+32]
  187. aesenc xmm5, [r15+48]
  188. aesenc xmm5, [r15+64]
  189. aesenc xmm5, [r15+80]
  190. aesenc xmm5, [r15+96]
  191. aesenc xmm5, [r15+112]
  192. aesenc xmm5, [r15+128]
  193. aesenc xmm5, [r15+144]
  194. cmp r10d, 11
  195. movdqa xmm9, OWORD PTR [r15+160]
  196. jl L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last
  197. aesenc xmm5, xmm9
  198. aesenc xmm5, [r15+176]
  199. cmp r10d, 13
  200. movdqa xmm9, OWORD PTR [r15+192]
  201. jl L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last
  202. aesenc xmm5, xmm9
  203. aesenc xmm5, [r15+208]
  204. movdqa xmm9, OWORD PTR [r15+224]
  205. L_AES_GCM_encrypt_calc_iv_1_aesenc_avx_last:
  206. aesenclast xmm5, xmm9
  207. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  208. ; Calc counter
  209. ; Initialization vector
  210. cmp edx, 0
  211. mov rcx, 0
  212. je L_AES_GCM_encrypt_calc_iv_done
  213. cmp edx, 16
  214. jl L_AES_GCM_encrypt_calc_iv_lt16
  215. and edx, 4294967280
  216. L_AES_GCM_encrypt_calc_iv_16_loop:
  217. movdqu xmm8, [rax+rcx]
  218. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  219. pxor xmm4, xmm8
  220. pshufd xmm1, xmm4, 78
  221. pshufd xmm2, xmm5, 78
  222. movdqa xmm3, xmm5
  223. movdqa xmm0, xmm5
  224. pclmulqdq xmm3, xmm4, 17
  225. pclmulqdq xmm0, xmm4, 0
  226. pxor xmm1, xmm4
  227. pxor xmm2, xmm5
  228. pclmulqdq xmm1, xmm2, 0
  229. pxor xmm1, xmm0
  230. pxor xmm1, xmm3
  231. movdqa xmm2, xmm1
  232. movdqa xmm7, xmm0
  233. movdqa xmm4, xmm3
  234. pslldq xmm2, 8
  235. psrldq xmm1, 8
  236. pxor xmm7, xmm2
  237. pxor xmm4, xmm1
  238. movdqa xmm0, xmm7
  239. movdqa xmm1, xmm4
  240. psrld xmm0, 31
  241. psrld xmm1, 31
  242. pslld xmm7, 1
  243. pslld xmm4, 1
  244. movdqa xmm2, xmm0
  245. pslldq xmm0, 4
  246. psrldq xmm2, 12
  247. pslldq xmm1, 4
  248. por xmm4, xmm2
  249. por xmm7, xmm0
  250. por xmm4, xmm1
  251. movdqa xmm0, xmm7
  252. movdqa xmm1, xmm7
  253. movdqa xmm2, xmm7
  254. pslld xmm0, 31
  255. pslld xmm1, 30
  256. pslld xmm2, 25
  257. pxor xmm0, xmm1
  258. pxor xmm0, xmm2
  259. movdqa xmm1, xmm0
  260. psrldq xmm1, 4
  261. pslldq xmm0, 12
  262. pxor xmm7, xmm0
  263. movdqa xmm2, xmm7
  264. movdqa xmm3, xmm7
  265. movdqa xmm0, xmm7
  266. psrld xmm2, 1
  267. psrld xmm3, 2
  268. psrld xmm0, 7
  269. pxor xmm2, xmm3
  270. pxor xmm2, xmm0
  271. pxor xmm2, xmm1
  272. pxor xmm2, xmm7
  273. pxor xmm4, xmm2
  274. add ecx, 16
  275. cmp ecx, edx
  276. jl L_AES_GCM_encrypt_calc_iv_16_loop
  277. mov edx, ebx
  278. cmp ecx, edx
  279. je L_AES_GCM_encrypt_calc_iv_done
  280. L_AES_GCM_encrypt_calc_iv_lt16:
  281. sub rsp, 16
  282. pxor xmm8, xmm8
  283. xor ebx, ebx
  284. movdqu [rsp], xmm8
  285. L_AES_GCM_encrypt_calc_iv_loop:
  286. movzx r13d, BYTE PTR [rax+rcx]
  287. mov BYTE PTR [rsp+rbx], r13b
  288. inc ecx
  289. inc ebx
  290. cmp ecx, edx
  291. jl L_AES_GCM_encrypt_calc_iv_loop
  292. movdqu xmm8, [rsp]
  293. add rsp, 16
  294. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  295. pxor xmm4, xmm8
  296. pshufd xmm1, xmm4, 78
  297. pshufd xmm2, xmm5, 78
  298. movdqa xmm3, xmm5
  299. movdqa xmm0, xmm5
  300. pclmulqdq xmm3, xmm4, 17
  301. pclmulqdq xmm0, xmm4, 0
  302. pxor xmm1, xmm4
  303. pxor xmm2, xmm5
  304. pclmulqdq xmm1, xmm2, 0
  305. pxor xmm1, xmm0
  306. pxor xmm1, xmm3
  307. movdqa xmm2, xmm1
  308. movdqa xmm7, xmm0
  309. movdqa xmm4, xmm3
  310. pslldq xmm2, 8
  311. psrldq xmm1, 8
  312. pxor xmm7, xmm2
  313. pxor xmm4, xmm1
  314. movdqa xmm0, xmm7
  315. movdqa xmm1, xmm4
  316. psrld xmm0, 31
  317. psrld xmm1, 31
  318. pslld xmm7, 1
  319. pslld xmm4, 1
  320. movdqa xmm2, xmm0
  321. pslldq xmm0, 4
  322. psrldq xmm2, 12
  323. pslldq xmm1, 4
  324. por xmm4, xmm2
  325. por xmm7, xmm0
  326. por xmm4, xmm1
  327. movdqa xmm0, xmm7
  328. movdqa xmm1, xmm7
  329. movdqa xmm2, xmm7
  330. pslld xmm0, 31
  331. pslld xmm1, 30
  332. pslld xmm2, 25
  333. pxor xmm0, xmm1
  334. pxor xmm0, xmm2
  335. movdqa xmm1, xmm0
  336. psrldq xmm1, 4
  337. pslldq xmm0, 12
  338. pxor xmm7, xmm0
  339. movdqa xmm2, xmm7
  340. movdqa xmm3, xmm7
  341. movdqa xmm0, xmm7
  342. psrld xmm2, 1
  343. psrld xmm3, 2
  344. psrld xmm0, 7
  345. pxor xmm2, xmm3
  346. pxor xmm2, xmm0
  347. pxor xmm2, xmm1
  348. pxor xmm2, xmm7
  349. pxor xmm4, xmm2
  350. L_AES_GCM_encrypt_calc_iv_done:
  351. ; T = Encrypt counter
  352. pxor xmm0, xmm0
  353. shl edx, 3
  354. pinsrq xmm0, rdx, 0
  355. pxor xmm4, xmm0
  356. pshufd xmm1, xmm4, 78
  357. pshufd xmm2, xmm5, 78
  358. movdqa xmm3, xmm5
  359. movdqa xmm0, xmm5
  360. pclmulqdq xmm3, xmm4, 17
  361. pclmulqdq xmm0, xmm4, 0
  362. pxor xmm1, xmm4
  363. pxor xmm2, xmm5
  364. pclmulqdq xmm1, xmm2, 0
  365. pxor xmm1, xmm0
  366. pxor xmm1, xmm3
  367. movdqa xmm2, xmm1
  368. movdqa xmm7, xmm0
  369. movdqa xmm4, xmm3
  370. pslldq xmm2, 8
  371. psrldq xmm1, 8
  372. pxor xmm7, xmm2
  373. pxor xmm4, xmm1
  374. movdqa xmm0, xmm7
  375. movdqa xmm1, xmm4
  376. psrld xmm0, 31
  377. psrld xmm1, 31
  378. pslld xmm7, 1
  379. pslld xmm4, 1
  380. movdqa xmm2, xmm0
  381. pslldq xmm0, 4
  382. psrldq xmm2, 12
  383. pslldq xmm1, 4
  384. por xmm4, xmm2
  385. por xmm7, xmm0
  386. por xmm4, xmm1
  387. movdqa xmm0, xmm7
  388. movdqa xmm1, xmm7
  389. movdqa xmm2, xmm7
  390. pslld xmm0, 31
  391. pslld xmm1, 30
  392. pslld xmm2, 25
  393. pxor xmm0, xmm1
  394. pxor xmm0, xmm2
  395. movdqa xmm1, xmm0
  396. psrldq xmm1, 4
  397. pslldq xmm0, 12
  398. pxor xmm7, xmm0
  399. movdqa xmm2, xmm7
  400. movdqa xmm3, xmm7
  401. movdqa xmm0, xmm7
  402. psrld xmm2, 1
  403. psrld xmm3, 2
  404. psrld xmm0, 7
  405. pxor xmm2, xmm3
  406. pxor xmm2, xmm0
  407. pxor xmm2, xmm1
  408. pxor xmm2, xmm7
  409. pxor xmm4, xmm2
  410. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  411. ; Encrypt counter
  412. movdqa xmm8, OWORD PTR [r15]
  413. pxor xmm8, xmm4
  414. aesenc xmm8, [r15+16]
  415. aesenc xmm8, [r15+32]
  416. aesenc xmm8, [r15+48]
  417. aesenc xmm8, [r15+64]
  418. aesenc xmm8, [r15+80]
  419. aesenc xmm8, [r15+96]
  420. aesenc xmm8, [r15+112]
  421. aesenc xmm8, [r15+128]
  422. aesenc xmm8, [r15+144]
  423. cmp r10d, 11
  424. movdqa xmm9, OWORD PTR [r15+160]
  425. jl L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last
  426. aesenc xmm8, xmm9
  427. aesenc xmm8, [r15+176]
  428. cmp r10d, 13
  429. movdqa xmm9, OWORD PTR [r15+192]
  430. jl L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last
  431. aesenc xmm8, xmm9
  432. aesenc xmm8, [r15+208]
  433. movdqa xmm9, OWORD PTR [r15+224]
  434. L_AES_GCM_encrypt_calc_iv_2_aesenc_avx_last:
  435. aesenclast xmm8, xmm9
  436. movdqu [rsp+144], xmm8
  437. L_AES_GCM_encrypt_iv_done:
  438. ; Additional authentication data
  439. mov edx, r11d
  440. cmp edx, 0
  441. je L_AES_GCM_encrypt_calc_aad_done
  442. xor ecx, ecx
  443. cmp edx, 16
  444. jl L_AES_GCM_encrypt_calc_aad_lt16
  445. and edx, 4294967280
  446. L_AES_GCM_encrypt_calc_aad_16_loop:
  447. movdqu xmm8, [r12+rcx]
  448. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  449. pxor xmm6, xmm8
  450. pshufd xmm1, xmm6, 78
  451. pshufd xmm2, xmm5, 78
  452. movdqa xmm3, xmm5
  453. movdqa xmm0, xmm5
  454. pclmulqdq xmm3, xmm6, 17
  455. pclmulqdq xmm0, xmm6, 0
  456. pxor xmm1, xmm6
  457. pxor xmm2, xmm5
  458. pclmulqdq xmm1, xmm2, 0
  459. pxor xmm1, xmm0
  460. pxor xmm1, xmm3
  461. movdqa xmm2, xmm1
  462. movdqa xmm7, xmm0
  463. movdqa xmm6, xmm3
  464. pslldq xmm2, 8
  465. psrldq xmm1, 8
  466. pxor xmm7, xmm2
  467. pxor xmm6, xmm1
  468. movdqa xmm0, xmm7
  469. movdqa xmm1, xmm6
  470. psrld xmm0, 31
  471. psrld xmm1, 31
  472. pslld xmm7, 1
  473. pslld xmm6, 1
  474. movdqa xmm2, xmm0
  475. pslldq xmm0, 4
  476. psrldq xmm2, 12
  477. pslldq xmm1, 4
  478. por xmm6, xmm2
  479. por xmm7, xmm0
  480. por xmm6, xmm1
  481. movdqa xmm0, xmm7
  482. movdqa xmm1, xmm7
  483. movdqa xmm2, xmm7
  484. pslld xmm0, 31
  485. pslld xmm1, 30
  486. pslld xmm2, 25
  487. pxor xmm0, xmm1
  488. pxor xmm0, xmm2
  489. movdqa xmm1, xmm0
  490. psrldq xmm1, 4
  491. pslldq xmm0, 12
  492. pxor xmm7, xmm0
  493. movdqa xmm2, xmm7
  494. movdqa xmm3, xmm7
  495. movdqa xmm0, xmm7
  496. psrld xmm2, 1
  497. psrld xmm3, 2
  498. psrld xmm0, 7
  499. pxor xmm2, xmm3
  500. pxor xmm2, xmm0
  501. pxor xmm2, xmm1
  502. pxor xmm2, xmm7
  503. pxor xmm6, xmm2
  504. add ecx, 16
  505. cmp ecx, edx
  506. jl L_AES_GCM_encrypt_calc_aad_16_loop
  507. mov edx, r11d
  508. cmp ecx, edx
  509. je L_AES_GCM_encrypt_calc_aad_done
  510. L_AES_GCM_encrypt_calc_aad_lt16:
  511. sub rsp, 16
  512. pxor xmm8, xmm8
  513. xor ebx, ebx
  514. movdqu [rsp], xmm8
  515. L_AES_GCM_encrypt_calc_aad_loop:
  516. movzx r13d, BYTE PTR [r12+rcx]
  517. mov BYTE PTR [rsp+rbx], r13b
  518. inc ecx
  519. inc ebx
  520. cmp ecx, edx
  521. jl L_AES_GCM_encrypt_calc_aad_loop
  522. movdqu xmm8, [rsp]
  523. add rsp, 16
  524. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  525. pxor xmm6, xmm8
  526. pshufd xmm1, xmm6, 78
  527. pshufd xmm2, xmm5, 78
  528. movdqa xmm3, xmm5
  529. movdqa xmm0, xmm5
  530. pclmulqdq xmm3, xmm6, 17
  531. pclmulqdq xmm0, xmm6, 0
  532. pxor xmm1, xmm6
  533. pxor xmm2, xmm5
  534. pclmulqdq xmm1, xmm2, 0
  535. pxor xmm1, xmm0
  536. pxor xmm1, xmm3
  537. movdqa xmm2, xmm1
  538. movdqa xmm7, xmm0
  539. movdqa xmm6, xmm3
  540. pslldq xmm2, 8
  541. psrldq xmm1, 8
  542. pxor xmm7, xmm2
  543. pxor xmm6, xmm1
  544. movdqa xmm0, xmm7
  545. movdqa xmm1, xmm6
  546. psrld xmm0, 31
  547. psrld xmm1, 31
  548. pslld xmm7, 1
  549. pslld xmm6, 1
  550. movdqa xmm2, xmm0
  551. pslldq xmm0, 4
  552. psrldq xmm2, 12
  553. pslldq xmm1, 4
  554. por xmm6, xmm2
  555. por xmm7, xmm0
  556. por xmm6, xmm1
  557. movdqa xmm0, xmm7
  558. movdqa xmm1, xmm7
  559. movdqa xmm2, xmm7
  560. pslld xmm0, 31
  561. pslld xmm1, 30
  562. pslld xmm2, 25
  563. pxor xmm0, xmm1
  564. pxor xmm0, xmm2
  565. movdqa xmm1, xmm0
  566. psrldq xmm1, 4
  567. pslldq xmm0, 12
  568. pxor xmm7, xmm0
  569. movdqa xmm2, xmm7
  570. movdqa xmm3, xmm7
  571. movdqa xmm0, xmm7
  572. psrld xmm2, 1
  573. psrld xmm3, 2
  574. psrld xmm0, 7
  575. pxor xmm2, xmm3
  576. pxor xmm2, xmm0
  577. pxor xmm2, xmm1
  578. pxor xmm2, xmm7
  579. pxor xmm6, xmm2
  580. L_AES_GCM_encrypt_calc_aad_done:
  581. ; Calculate counter and H
  582. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
  583. movdqa xmm9, xmm5
  584. paddd xmm4, OWORD PTR L_aes_gcm_one
  585. movdqa xmm8, xmm5
  586. movdqu [rsp+128], xmm4
  587. psrlq xmm9, 63
  588. psllq xmm8, 1
  589. pslldq xmm9, 8
  590. por xmm8, xmm9
  591. pshufd xmm5, xmm5, 255
  592. psrad xmm5, 31
  593. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  594. pxor xmm5, xmm8
  595. xor rbx, rbx
  596. cmp r9d, 128
  597. mov r13d, r9d
  598. jl L_AES_GCM_encrypt_done_128
  599. and r13d, 4294967168
  600. movdqa xmm2, xmm6
  601. ; H ^ 1
  602. movdqu [rsp], xmm5
  603. ; H ^ 2
  604. pshufd xmm9, xmm5, 78
  605. pshufd xmm10, xmm5, 78
  606. movdqa xmm11, xmm5
  607. movdqa xmm8, xmm5
  608. pclmulqdq xmm11, xmm5, 17
  609. pclmulqdq xmm8, xmm5, 0
  610. pxor xmm9, xmm5
  611. pxor xmm10, xmm5
  612. pclmulqdq xmm9, xmm10, 0
  613. pxor xmm9, xmm8
  614. pxor xmm9, xmm11
  615. movdqa xmm10, xmm9
  616. movdqa xmm0, xmm11
  617. pslldq xmm10, 8
  618. psrldq xmm9, 8
  619. pxor xmm8, xmm10
  620. pxor xmm0, xmm9
  621. movdqa xmm12, xmm8
  622. movdqa xmm13, xmm8
  623. movdqa xmm14, xmm8
  624. pslld xmm12, 31
  625. pslld xmm13, 30
  626. pslld xmm14, 25
  627. pxor xmm12, xmm13
  628. pxor xmm12, xmm14
  629. movdqa xmm13, xmm12
  630. psrldq xmm13, 4
  631. pslldq xmm12, 12
  632. pxor xmm8, xmm12
  633. movdqa xmm14, xmm8
  634. movdqa xmm10, xmm8
  635. movdqa xmm9, xmm8
  636. psrld xmm14, 1
  637. psrld xmm10, 2
  638. psrld xmm9, 7
  639. pxor xmm14, xmm10
  640. pxor xmm14, xmm9
  641. pxor xmm14, xmm13
  642. pxor xmm14, xmm8
  643. pxor xmm0, xmm14
  644. movdqu [rsp+16], xmm0
  645. ; H ^ 3
  646. pshufd xmm9, xmm5, 78
  647. pshufd xmm10, xmm0, 78
  648. movdqa xmm11, xmm0
  649. movdqa xmm8, xmm0
  650. pclmulqdq xmm11, xmm5, 17
  651. pclmulqdq xmm8, xmm5, 0
  652. pxor xmm9, xmm5
  653. pxor xmm10, xmm0
  654. pclmulqdq xmm9, xmm10, 0
  655. pxor xmm9, xmm8
  656. pxor xmm9, xmm11
  657. movdqa xmm10, xmm9
  658. movdqa xmm1, xmm11
  659. pslldq xmm10, 8
  660. psrldq xmm9, 8
  661. pxor xmm8, xmm10
  662. pxor xmm1, xmm9
  663. movdqa xmm12, xmm8
  664. movdqa xmm13, xmm8
  665. movdqa xmm14, xmm8
  666. pslld xmm12, 31
  667. pslld xmm13, 30
  668. pslld xmm14, 25
  669. pxor xmm12, xmm13
  670. pxor xmm12, xmm14
  671. movdqa xmm13, xmm12
  672. psrldq xmm13, 4
  673. pslldq xmm12, 12
  674. pxor xmm8, xmm12
  675. movdqa xmm14, xmm8
  676. movdqa xmm10, xmm8
  677. movdqa xmm9, xmm8
  678. psrld xmm14, 1
  679. psrld xmm10, 2
  680. psrld xmm9, 7
  681. pxor xmm14, xmm10
  682. pxor xmm14, xmm9
  683. pxor xmm14, xmm13
  684. pxor xmm14, xmm8
  685. pxor xmm1, xmm14
  686. movdqu [rsp+32], xmm1
  687. ; H ^ 4
  688. pshufd xmm9, xmm0, 78
  689. pshufd xmm10, xmm0, 78
  690. movdqa xmm11, xmm0
  691. movdqa xmm8, xmm0
  692. pclmulqdq xmm11, xmm0, 17
  693. pclmulqdq xmm8, xmm0, 0
  694. pxor xmm9, xmm0
  695. pxor xmm10, xmm0
  696. pclmulqdq xmm9, xmm10, 0
  697. pxor xmm9, xmm8
  698. pxor xmm9, xmm11
  699. movdqa xmm10, xmm9
  700. movdqa xmm3, xmm11
  701. pslldq xmm10, 8
  702. psrldq xmm9, 8
  703. pxor xmm8, xmm10
  704. pxor xmm3, xmm9
  705. movdqa xmm12, xmm8
  706. movdqa xmm13, xmm8
  707. movdqa xmm14, xmm8
  708. pslld xmm12, 31
  709. pslld xmm13, 30
  710. pslld xmm14, 25
  711. pxor xmm12, xmm13
  712. pxor xmm12, xmm14
  713. movdqa xmm13, xmm12
  714. psrldq xmm13, 4
  715. pslldq xmm12, 12
  716. pxor xmm8, xmm12
  717. movdqa xmm14, xmm8
  718. movdqa xmm10, xmm8
  719. movdqa xmm9, xmm8
  720. psrld xmm14, 1
  721. psrld xmm10, 2
  722. psrld xmm9, 7
  723. pxor xmm14, xmm10
  724. pxor xmm14, xmm9
  725. pxor xmm14, xmm13
  726. pxor xmm14, xmm8
  727. pxor xmm3, xmm14
  728. movdqu [rsp+48], xmm3
  729. ; H ^ 5
  730. pshufd xmm9, xmm0, 78
  731. pshufd xmm10, xmm1, 78
  732. movdqa xmm11, xmm1
  733. movdqa xmm8, xmm1
  734. pclmulqdq xmm11, xmm0, 17
  735. pclmulqdq xmm8, xmm0, 0
  736. pxor xmm9, xmm0
  737. pxor xmm10, xmm1
  738. pclmulqdq xmm9, xmm10, 0
  739. pxor xmm9, xmm8
  740. pxor xmm9, xmm11
  741. movdqa xmm10, xmm9
  742. movdqa xmm7, xmm11
  743. pslldq xmm10, 8
  744. psrldq xmm9, 8
  745. pxor xmm8, xmm10
  746. pxor xmm7, xmm9
  747. movdqa xmm12, xmm8
  748. movdqa xmm13, xmm8
  749. movdqa xmm14, xmm8
  750. pslld xmm12, 31
  751. pslld xmm13, 30
  752. pslld xmm14, 25
  753. pxor xmm12, xmm13
  754. pxor xmm12, xmm14
  755. movdqa xmm13, xmm12
  756. psrldq xmm13, 4
  757. pslldq xmm12, 12
  758. pxor xmm8, xmm12
  759. movdqa xmm14, xmm8
  760. movdqa xmm10, xmm8
  761. movdqa xmm9, xmm8
  762. psrld xmm14, 1
  763. psrld xmm10, 2
  764. psrld xmm9, 7
  765. pxor xmm14, xmm10
  766. pxor xmm14, xmm9
  767. pxor xmm14, xmm13
  768. pxor xmm14, xmm8
  769. pxor xmm7, xmm14
  770. movdqu [rsp+64], xmm7
  771. ; H ^ 6
  772. pshufd xmm9, xmm1, 78
  773. pshufd xmm10, xmm1, 78
  774. movdqa xmm11, xmm1
  775. movdqa xmm8, xmm1
  776. pclmulqdq xmm11, xmm1, 17
  777. pclmulqdq xmm8, xmm1, 0
  778. pxor xmm9, xmm1
  779. pxor xmm10, xmm1
  780. pclmulqdq xmm9, xmm10, 0
  781. pxor xmm9, xmm8
  782. pxor xmm9, xmm11
  783. movdqa xmm10, xmm9
  784. movdqa xmm7, xmm11
  785. pslldq xmm10, 8
  786. psrldq xmm9, 8
  787. pxor xmm8, xmm10
  788. pxor xmm7, xmm9
  789. movdqa xmm12, xmm8
  790. movdqa xmm13, xmm8
  791. movdqa xmm14, xmm8
  792. pslld xmm12, 31
  793. pslld xmm13, 30
  794. pslld xmm14, 25
  795. pxor xmm12, xmm13
  796. pxor xmm12, xmm14
  797. movdqa xmm13, xmm12
  798. psrldq xmm13, 4
  799. pslldq xmm12, 12
  800. pxor xmm8, xmm12
  801. movdqa xmm14, xmm8
  802. movdqa xmm10, xmm8
  803. movdqa xmm9, xmm8
  804. psrld xmm14, 1
  805. psrld xmm10, 2
  806. psrld xmm9, 7
  807. pxor xmm14, xmm10
  808. pxor xmm14, xmm9
  809. pxor xmm14, xmm13
  810. pxor xmm14, xmm8
  811. pxor xmm7, xmm14
  812. movdqu [rsp+80], xmm7
  813. ; H ^ 7
  814. pshufd xmm9, xmm1, 78
  815. pshufd xmm10, xmm3, 78
  816. movdqa xmm11, xmm3
  817. movdqa xmm8, xmm3
  818. pclmulqdq xmm11, xmm1, 17
  819. pclmulqdq xmm8, xmm1, 0
  820. pxor xmm9, xmm1
  821. pxor xmm10, xmm3
  822. pclmulqdq xmm9, xmm10, 0
  823. pxor xmm9, xmm8
  824. pxor xmm9, xmm11
  825. movdqa xmm10, xmm9
  826. movdqa xmm7, xmm11
  827. pslldq xmm10, 8
  828. psrldq xmm9, 8
  829. pxor xmm8, xmm10
  830. pxor xmm7, xmm9
  831. movdqa xmm12, xmm8
  832. movdqa xmm13, xmm8
  833. movdqa xmm14, xmm8
  834. pslld xmm12, 31
  835. pslld xmm13, 30
  836. pslld xmm14, 25
  837. pxor xmm12, xmm13
  838. pxor xmm12, xmm14
  839. movdqa xmm13, xmm12
  840. psrldq xmm13, 4
  841. pslldq xmm12, 12
  842. pxor xmm8, xmm12
  843. movdqa xmm14, xmm8
  844. movdqa xmm10, xmm8
  845. movdqa xmm9, xmm8
  846. psrld xmm14, 1
  847. psrld xmm10, 2
  848. psrld xmm9, 7
  849. pxor xmm14, xmm10
  850. pxor xmm14, xmm9
  851. pxor xmm14, xmm13
  852. pxor xmm14, xmm8
  853. pxor xmm7, xmm14
  854. movdqu [rsp+96], xmm7
  855. ; H ^ 8
  856. pshufd xmm9, xmm3, 78
  857. pshufd xmm10, xmm3, 78
  858. movdqa xmm11, xmm3
  859. movdqa xmm8, xmm3
  860. pclmulqdq xmm11, xmm3, 17
  861. pclmulqdq xmm8, xmm3, 0
  862. pxor xmm9, xmm3
  863. pxor xmm10, xmm3
  864. pclmulqdq xmm9, xmm10, 0
  865. pxor xmm9, xmm8
  866. pxor xmm9, xmm11
  867. movdqa xmm10, xmm9
  868. movdqa xmm7, xmm11
  869. pslldq xmm10, 8
  870. psrldq xmm9, 8
  871. pxor xmm8, xmm10
  872. pxor xmm7, xmm9
  873. movdqa xmm12, xmm8
  874. movdqa xmm13, xmm8
  875. movdqa xmm14, xmm8
  876. pslld xmm12, 31
  877. pslld xmm13, 30
  878. pslld xmm14, 25
  879. pxor xmm12, xmm13
  880. pxor xmm12, xmm14
  881. movdqa xmm13, xmm12
  882. psrldq xmm13, 4
  883. pslldq xmm12, 12
  884. pxor xmm8, xmm12
  885. movdqa xmm14, xmm8
  886. movdqa xmm10, xmm8
  887. movdqa xmm9, xmm8
  888. psrld xmm14, 1
  889. psrld xmm10, 2
  890. psrld xmm9, 7
  891. pxor xmm14, xmm10
  892. pxor xmm14, xmm9
  893. pxor xmm14, xmm13
  894. pxor xmm14, xmm8
  895. pxor xmm7, xmm14
  896. movdqu [rsp+112], xmm7
  897. ; First 128 bytes of input
  898. movdqu xmm8, [rsp+128]
  899. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  900. movdqa xmm0, xmm8
  901. pshufb xmm8, xmm1
  902. movdqa xmm9, xmm0
  903. paddd xmm9, OWORD PTR L_aes_gcm_one
  904. pshufb xmm9, xmm1
  905. movdqa xmm10, xmm0
  906. paddd xmm10, OWORD PTR L_aes_gcm_two
  907. pshufb xmm10, xmm1
  908. movdqa xmm11, xmm0
  909. paddd xmm11, OWORD PTR L_aes_gcm_three
  910. pshufb xmm11, xmm1
  911. movdqa xmm12, xmm0
  912. paddd xmm12, OWORD PTR L_aes_gcm_four
  913. pshufb xmm12, xmm1
  914. movdqa xmm13, xmm0
  915. paddd xmm13, OWORD PTR L_aes_gcm_five
  916. pshufb xmm13, xmm1
  917. movdqa xmm14, xmm0
  918. paddd xmm14, OWORD PTR L_aes_gcm_six
  919. pshufb xmm14, xmm1
  920. movdqa xmm15, xmm0
  921. paddd xmm15, OWORD PTR L_aes_gcm_seven
  922. pshufb xmm15, xmm1
  923. paddd xmm0, OWORD PTR L_aes_gcm_eight
  924. movdqa xmm7, OWORD PTR [r15]
  925. movdqu [rsp+128], xmm0
  926. pxor xmm8, xmm7
  927. pxor xmm9, xmm7
  928. pxor xmm10, xmm7
  929. pxor xmm11, xmm7
  930. pxor xmm12, xmm7
  931. pxor xmm13, xmm7
  932. pxor xmm14, xmm7
  933. pxor xmm15, xmm7
  934. movdqa xmm7, OWORD PTR [r15+16]
  935. aesenc xmm8, xmm7
  936. aesenc xmm9, xmm7
  937. aesenc xmm10, xmm7
  938. aesenc xmm11, xmm7
  939. aesenc xmm12, xmm7
  940. aesenc xmm13, xmm7
  941. aesenc xmm14, xmm7
  942. aesenc xmm15, xmm7
  943. movdqa xmm7, OWORD PTR [r15+32]
  944. aesenc xmm8, xmm7
  945. aesenc xmm9, xmm7
  946. aesenc xmm10, xmm7
  947. aesenc xmm11, xmm7
  948. aesenc xmm12, xmm7
  949. aesenc xmm13, xmm7
  950. aesenc xmm14, xmm7
  951. aesenc xmm15, xmm7
  952. movdqa xmm7, OWORD PTR [r15+48]
  953. aesenc xmm8, xmm7
  954. aesenc xmm9, xmm7
  955. aesenc xmm10, xmm7
  956. aesenc xmm11, xmm7
  957. aesenc xmm12, xmm7
  958. aesenc xmm13, xmm7
  959. aesenc xmm14, xmm7
  960. aesenc xmm15, xmm7
  961. movdqa xmm7, OWORD PTR [r15+64]
  962. aesenc xmm8, xmm7
  963. aesenc xmm9, xmm7
  964. aesenc xmm10, xmm7
  965. aesenc xmm11, xmm7
  966. aesenc xmm12, xmm7
  967. aesenc xmm13, xmm7
  968. aesenc xmm14, xmm7
  969. aesenc xmm15, xmm7
  970. movdqa xmm7, OWORD PTR [r15+80]
  971. aesenc xmm8, xmm7
  972. aesenc xmm9, xmm7
  973. aesenc xmm10, xmm7
  974. aesenc xmm11, xmm7
  975. aesenc xmm12, xmm7
  976. aesenc xmm13, xmm7
  977. aesenc xmm14, xmm7
  978. aesenc xmm15, xmm7
  979. movdqa xmm7, OWORD PTR [r15+96]
  980. aesenc xmm8, xmm7
  981. aesenc xmm9, xmm7
  982. aesenc xmm10, xmm7
  983. aesenc xmm11, xmm7
  984. aesenc xmm12, xmm7
  985. aesenc xmm13, xmm7
  986. aesenc xmm14, xmm7
  987. aesenc xmm15, xmm7
  988. movdqa xmm7, OWORD PTR [r15+112]
  989. aesenc xmm8, xmm7
  990. aesenc xmm9, xmm7
  991. aesenc xmm10, xmm7
  992. aesenc xmm11, xmm7
  993. aesenc xmm12, xmm7
  994. aesenc xmm13, xmm7
  995. aesenc xmm14, xmm7
  996. aesenc xmm15, xmm7
  997. movdqa xmm7, OWORD PTR [r15+128]
  998. aesenc xmm8, xmm7
  999. aesenc xmm9, xmm7
  1000. aesenc xmm10, xmm7
  1001. aesenc xmm11, xmm7
  1002. aesenc xmm12, xmm7
  1003. aesenc xmm13, xmm7
  1004. aesenc xmm14, xmm7
  1005. aesenc xmm15, xmm7
  1006. movdqa xmm7, OWORD PTR [r15+144]
  1007. aesenc xmm8, xmm7
  1008. aesenc xmm9, xmm7
  1009. aesenc xmm10, xmm7
  1010. aesenc xmm11, xmm7
  1011. aesenc xmm12, xmm7
  1012. aesenc xmm13, xmm7
  1013. aesenc xmm14, xmm7
  1014. aesenc xmm15, xmm7
  1015. cmp r10d, 11
  1016. movdqa xmm7, OWORD PTR [r15+160]
  1017. jl L_AES_GCM_encrypt_enc_done
  1018. aesenc xmm8, xmm7
  1019. aesenc xmm9, xmm7
  1020. aesenc xmm10, xmm7
  1021. aesenc xmm11, xmm7
  1022. aesenc xmm12, xmm7
  1023. aesenc xmm13, xmm7
  1024. aesenc xmm14, xmm7
  1025. aesenc xmm15, xmm7
  1026. movdqa xmm7, OWORD PTR [r15+176]
  1027. aesenc xmm8, xmm7
  1028. aesenc xmm9, xmm7
  1029. aesenc xmm10, xmm7
  1030. aesenc xmm11, xmm7
  1031. aesenc xmm12, xmm7
  1032. aesenc xmm13, xmm7
  1033. aesenc xmm14, xmm7
  1034. aesenc xmm15, xmm7
  1035. cmp r10d, 13
  1036. movdqa xmm7, OWORD PTR [r15+192]
  1037. jl L_AES_GCM_encrypt_enc_done
  1038. aesenc xmm8, xmm7
  1039. aesenc xmm9, xmm7
  1040. aesenc xmm10, xmm7
  1041. aesenc xmm11, xmm7
  1042. aesenc xmm12, xmm7
  1043. aesenc xmm13, xmm7
  1044. aesenc xmm14, xmm7
  1045. aesenc xmm15, xmm7
  1046. movdqa xmm7, OWORD PTR [r15+208]
  1047. aesenc xmm8, xmm7
  1048. aesenc xmm9, xmm7
  1049. aesenc xmm10, xmm7
  1050. aesenc xmm11, xmm7
  1051. aesenc xmm12, xmm7
  1052. aesenc xmm13, xmm7
  1053. aesenc xmm14, xmm7
  1054. aesenc xmm15, xmm7
  1055. movdqa xmm7, OWORD PTR [r15+224]
  1056. L_AES_GCM_encrypt_enc_done:
  1057. aesenclast xmm8, xmm7
  1058. aesenclast xmm9, xmm7
  1059. movdqu xmm0, [rdi]
  1060. movdqu xmm1, [rdi+16]
  1061. pxor xmm8, xmm0
  1062. pxor xmm9, xmm1
  1063. movdqu [rsi], xmm8
  1064. movdqu [rsi+16], xmm9
  1065. aesenclast xmm10, xmm7
  1066. aesenclast xmm11, xmm7
  1067. movdqu xmm0, [rdi+32]
  1068. movdqu xmm1, [rdi+48]
  1069. pxor xmm10, xmm0
  1070. pxor xmm11, xmm1
  1071. movdqu [rsi+32], xmm10
  1072. movdqu [rsi+48], xmm11
  1073. aesenclast xmm12, xmm7
  1074. aesenclast xmm13, xmm7
  1075. movdqu xmm0, [rdi+64]
  1076. movdqu xmm1, [rdi+80]
  1077. pxor xmm12, xmm0
  1078. pxor xmm13, xmm1
  1079. movdqu [rsi+64], xmm12
  1080. movdqu [rsi+80], xmm13
  1081. aesenclast xmm14, xmm7
  1082. aesenclast xmm15, xmm7
  1083. movdqu xmm0, [rdi+96]
  1084. movdqu xmm1, [rdi+112]
  1085. pxor xmm14, xmm0
  1086. pxor xmm15, xmm1
  1087. movdqu [rsi+96], xmm14
  1088. movdqu [rsi+112], xmm15
  1089. cmp r13d, 128
  1090. mov ebx, 128
  1091. jle L_AES_GCM_encrypt_end_128
  1092. ; More 128 bytes of input
  1093. L_AES_GCM_encrypt_ghash_128:
  1094. lea rcx, QWORD PTR [rdi+rbx]
  1095. lea rdx, QWORD PTR [rsi+rbx]
  1096. movdqu xmm8, [rsp+128]
  1097. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  1098. movdqa xmm0, xmm8
  1099. pshufb xmm8, xmm1
  1100. movdqa xmm9, xmm0
  1101. paddd xmm9, OWORD PTR L_aes_gcm_one
  1102. pshufb xmm9, xmm1
  1103. movdqa xmm10, xmm0
  1104. paddd xmm10, OWORD PTR L_aes_gcm_two
  1105. pshufb xmm10, xmm1
  1106. movdqa xmm11, xmm0
  1107. paddd xmm11, OWORD PTR L_aes_gcm_three
  1108. pshufb xmm11, xmm1
  1109. movdqa xmm12, xmm0
  1110. paddd xmm12, OWORD PTR L_aes_gcm_four
  1111. pshufb xmm12, xmm1
  1112. movdqa xmm13, xmm0
  1113. paddd xmm13, OWORD PTR L_aes_gcm_five
  1114. pshufb xmm13, xmm1
  1115. movdqa xmm14, xmm0
  1116. paddd xmm14, OWORD PTR L_aes_gcm_six
  1117. pshufb xmm14, xmm1
  1118. movdqa xmm15, xmm0
  1119. paddd xmm15, OWORD PTR L_aes_gcm_seven
  1120. pshufb xmm15, xmm1
  1121. paddd xmm0, OWORD PTR L_aes_gcm_eight
  1122. movdqa xmm7, OWORD PTR [r15]
  1123. movdqu [rsp+128], xmm0
  1124. pxor xmm8, xmm7
  1125. pxor xmm9, xmm7
  1126. pxor xmm10, xmm7
  1127. pxor xmm11, xmm7
  1128. pxor xmm12, xmm7
  1129. pxor xmm13, xmm7
  1130. pxor xmm14, xmm7
  1131. pxor xmm15, xmm7
  1132. movdqu xmm7, [rsp+112]
  1133. movdqu xmm0, [rdx+-128]
  1134. aesenc xmm8, [r15+16]
  1135. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1136. pxor xmm0, xmm2
  1137. pshufd xmm1, xmm7, 78
  1138. pshufd xmm5, xmm0, 78
  1139. pxor xmm1, xmm7
  1140. pxor xmm5, xmm0
  1141. movdqa xmm3, xmm0
  1142. pclmulqdq xmm3, xmm7, 17
  1143. aesenc xmm9, [r15+16]
  1144. aesenc xmm10, [r15+16]
  1145. movdqa xmm2, xmm0
  1146. pclmulqdq xmm2, xmm7, 0
  1147. aesenc xmm11, [r15+16]
  1148. aesenc xmm12, [r15+16]
  1149. pclmulqdq xmm1, xmm5, 0
  1150. aesenc xmm13, [r15+16]
  1151. aesenc xmm14, [r15+16]
  1152. aesenc xmm15, [r15+16]
  1153. pxor xmm1, xmm2
  1154. pxor xmm1, xmm3
  1155. movdqu xmm7, [rsp+96]
  1156. movdqu xmm0, [rdx+-112]
  1157. pshufd xmm4, xmm7, 78
  1158. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1159. aesenc xmm8, [r15+32]
  1160. pxor xmm4, xmm7
  1161. pshufd xmm5, xmm0, 78
  1162. pxor xmm5, xmm0
  1163. movdqa xmm6, xmm0
  1164. pclmulqdq xmm6, xmm7, 17
  1165. aesenc xmm9, [r15+32]
  1166. aesenc xmm10, [r15+32]
  1167. pclmulqdq xmm7, xmm0, 0
  1168. aesenc xmm11, [r15+32]
  1169. aesenc xmm12, [r15+32]
  1170. pclmulqdq xmm4, xmm5, 0
  1171. aesenc xmm13, [r15+32]
  1172. aesenc xmm14, [r15+32]
  1173. aesenc xmm15, [r15+32]
  1174. pxor xmm1, xmm7
  1175. pxor xmm2, xmm7
  1176. pxor xmm1, xmm6
  1177. pxor xmm3, xmm6
  1178. pxor xmm1, xmm4
  1179. movdqu xmm7, [rsp+80]
  1180. movdqu xmm0, [rdx+-96]
  1181. pshufd xmm4, xmm7, 78
  1182. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1183. aesenc xmm8, [r15+48]
  1184. pxor xmm4, xmm7
  1185. pshufd xmm5, xmm0, 78
  1186. pxor xmm5, xmm0
  1187. movdqa xmm6, xmm0
  1188. pclmulqdq xmm6, xmm7, 17
  1189. aesenc xmm9, [r15+48]
  1190. aesenc xmm10, [r15+48]
  1191. pclmulqdq xmm7, xmm0, 0
  1192. aesenc xmm11, [r15+48]
  1193. aesenc xmm12, [r15+48]
  1194. pclmulqdq xmm4, xmm5, 0
  1195. aesenc xmm13, [r15+48]
  1196. aesenc xmm14, [r15+48]
  1197. aesenc xmm15, [r15+48]
  1198. pxor xmm1, xmm7
  1199. pxor xmm2, xmm7
  1200. pxor xmm1, xmm6
  1201. pxor xmm3, xmm6
  1202. pxor xmm1, xmm4
  1203. movdqu xmm7, [rsp+64]
  1204. movdqu xmm0, [rdx+-80]
  1205. pshufd xmm4, xmm7, 78
  1206. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1207. aesenc xmm8, [r15+64]
  1208. pxor xmm4, xmm7
  1209. pshufd xmm5, xmm0, 78
  1210. pxor xmm5, xmm0
  1211. movdqa xmm6, xmm0
  1212. pclmulqdq xmm6, xmm7, 17
  1213. aesenc xmm9, [r15+64]
  1214. aesenc xmm10, [r15+64]
  1215. pclmulqdq xmm7, xmm0, 0
  1216. aesenc xmm11, [r15+64]
  1217. aesenc xmm12, [r15+64]
  1218. pclmulqdq xmm4, xmm5, 0
  1219. aesenc xmm13, [r15+64]
  1220. aesenc xmm14, [r15+64]
  1221. aesenc xmm15, [r15+64]
  1222. pxor xmm1, xmm7
  1223. pxor xmm2, xmm7
  1224. pxor xmm1, xmm6
  1225. pxor xmm3, xmm6
  1226. pxor xmm1, xmm4
  1227. movdqu xmm7, [rsp+48]
  1228. movdqu xmm0, [rdx+-64]
  1229. pshufd xmm4, xmm7, 78
  1230. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1231. aesenc xmm8, [r15+80]
  1232. pxor xmm4, xmm7
  1233. pshufd xmm5, xmm0, 78
  1234. pxor xmm5, xmm0
  1235. movdqa xmm6, xmm0
  1236. pclmulqdq xmm6, xmm7, 17
  1237. aesenc xmm9, [r15+80]
  1238. aesenc xmm10, [r15+80]
  1239. pclmulqdq xmm7, xmm0, 0
  1240. aesenc xmm11, [r15+80]
  1241. aesenc xmm12, [r15+80]
  1242. pclmulqdq xmm4, xmm5, 0
  1243. aesenc xmm13, [r15+80]
  1244. aesenc xmm14, [r15+80]
  1245. aesenc xmm15, [r15+80]
  1246. pxor xmm1, xmm7
  1247. pxor xmm2, xmm7
  1248. pxor xmm1, xmm6
  1249. pxor xmm3, xmm6
  1250. pxor xmm1, xmm4
  1251. movdqu xmm7, [rsp+32]
  1252. movdqu xmm0, [rdx+-48]
  1253. pshufd xmm4, xmm7, 78
  1254. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1255. aesenc xmm8, [r15+96]
  1256. pxor xmm4, xmm7
  1257. pshufd xmm5, xmm0, 78
  1258. pxor xmm5, xmm0
  1259. movdqa xmm6, xmm0
  1260. pclmulqdq xmm6, xmm7, 17
  1261. aesenc xmm9, [r15+96]
  1262. aesenc xmm10, [r15+96]
  1263. pclmulqdq xmm7, xmm0, 0
  1264. aesenc xmm11, [r15+96]
  1265. aesenc xmm12, [r15+96]
  1266. pclmulqdq xmm4, xmm5, 0
  1267. aesenc xmm13, [r15+96]
  1268. aesenc xmm14, [r15+96]
  1269. aesenc xmm15, [r15+96]
  1270. pxor xmm1, xmm7
  1271. pxor xmm2, xmm7
  1272. pxor xmm1, xmm6
  1273. pxor xmm3, xmm6
  1274. pxor xmm1, xmm4
  1275. movdqu xmm7, [rsp+16]
  1276. movdqu xmm0, [rdx+-32]
  1277. pshufd xmm4, xmm7, 78
  1278. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1279. aesenc xmm8, [r15+112]
  1280. pxor xmm4, xmm7
  1281. pshufd xmm5, xmm0, 78
  1282. pxor xmm5, xmm0
  1283. movdqa xmm6, xmm0
  1284. pclmulqdq xmm6, xmm7, 17
  1285. aesenc xmm9, [r15+112]
  1286. aesenc xmm10, [r15+112]
  1287. pclmulqdq xmm7, xmm0, 0
  1288. aesenc xmm11, [r15+112]
  1289. aesenc xmm12, [r15+112]
  1290. pclmulqdq xmm4, xmm5, 0
  1291. aesenc xmm13, [r15+112]
  1292. aesenc xmm14, [r15+112]
  1293. aesenc xmm15, [r15+112]
  1294. pxor xmm1, xmm7
  1295. pxor xmm2, xmm7
  1296. pxor xmm1, xmm6
  1297. pxor xmm3, xmm6
  1298. pxor xmm1, xmm4
  1299. movdqu xmm7, [rsp]
  1300. movdqu xmm0, [rdx+-16]
  1301. pshufd xmm4, xmm7, 78
  1302. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  1303. aesenc xmm8, [r15+128]
  1304. pxor xmm4, xmm7
  1305. pshufd xmm5, xmm0, 78
  1306. pxor xmm5, xmm0
  1307. movdqa xmm6, xmm0
  1308. pclmulqdq xmm6, xmm7, 17
  1309. aesenc xmm9, [r15+128]
  1310. aesenc xmm10, [r15+128]
  1311. pclmulqdq xmm7, xmm0, 0
  1312. aesenc xmm11, [r15+128]
  1313. aesenc xmm12, [r15+128]
  1314. pclmulqdq xmm4, xmm5, 0
  1315. aesenc xmm13, [r15+128]
  1316. aesenc xmm14, [r15+128]
  1317. aesenc xmm15, [r15+128]
  1318. pxor xmm1, xmm7
  1319. pxor xmm2, xmm7
  1320. pxor xmm1, xmm6
  1321. pxor xmm3, xmm6
  1322. pxor xmm1, xmm4
  1323. movdqa xmm5, xmm1
  1324. psrldq xmm1, 8
  1325. pslldq xmm5, 8
  1326. aesenc xmm8, [r15+144]
  1327. pxor xmm2, xmm5
  1328. pxor xmm3, xmm1
  1329. movdqa xmm7, xmm2
  1330. movdqa xmm4, xmm2
  1331. movdqa xmm5, xmm2
  1332. aesenc xmm9, [r15+144]
  1333. pslld xmm7, 31
  1334. pslld xmm4, 30
  1335. pslld xmm5, 25
  1336. aesenc xmm10, [r15+144]
  1337. pxor xmm7, xmm4
  1338. pxor xmm7, xmm5
  1339. aesenc xmm11, [r15+144]
  1340. movdqa xmm4, xmm7
  1341. pslldq xmm7, 12
  1342. psrldq xmm4, 4
  1343. aesenc xmm12, [r15+144]
  1344. pxor xmm2, xmm7
  1345. movdqa xmm5, xmm2
  1346. movdqa xmm1, xmm2
  1347. movdqa xmm0, xmm2
  1348. aesenc xmm13, [r15+144]
  1349. psrld xmm5, 1
  1350. psrld xmm1, 2
  1351. psrld xmm0, 7
  1352. aesenc xmm14, [r15+144]
  1353. pxor xmm5, xmm1
  1354. pxor xmm5, xmm0
  1355. aesenc xmm15, [r15+144]
  1356. pxor xmm5, xmm4
  1357. pxor xmm2, xmm5
  1358. pxor xmm2, xmm3
  1359. cmp r10d, 11
  1360. movdqa xmm7, OWORD PTR [r15+160]
  1361. jl L_AES_GCM_encrypt_aesenc_128_ghash_avx_done
  1362. aesenc xmm8, xmm7
  1363. aesenc xmm9, xmm7
  1364. aesenc xmm10, xmm7
  1365. aesenc xmm11, xmm7
  1366. aesenc xmm12, xmm7
  1367. aesenc xmm13, xmm7
  1368. aesenc xmm14, xmm7
  1369. aesenc xmm15, xmm7
  1370. movdqa xmm7, OWORD PTR [r15+176]
  1371. aesenc xmm8, xmm7
  1372. aesenc xmm9, xmm7
  1373. aesenc xmm10, xmm7
  1374. aesenc xmm11, xmm7
  1375. aesenc xmm12, xmm7
  1376. aesenc xmm13, xmm7
  1377. aesenc xmm14, xmm7
  1378. aesenc xmm15, xmm7
  1379. cmp r10d, 13
  1380. movdqa xmm7, OWORD PTR [r15+192]
  1381. jl L_AES_GCM_encrypt_aesenc_128_ghash_avx_done
  1382. aesenc xmm8, xmm7
  1383. aesenc xmm9, xmm7
  1384. aesenc xmm10, xmm7
  1385. aesenc xmm11, xmm7
  1386. aesenc xmm12, xmm7
  1387. aesenc xmm13, xmm7
  1388. aesenc xmm14, xmm7
  1389. aesenc xmm15, xmm7
  1390. movdqa xmm7, OWORD PTR [r15+208]
  1391. aesenc xmm8, xmm7
  1392. aesenc xmm9, xmm7
  1393. aesenc xmm10, xmm7
  1394. aesenc xmm11, xmm7
  1395. aesenc xmm12, xmm7
  1396. aesenc xmm13, xmm7
  1397. aesenc xmm14, xmm7
  1398. aesenc xmm15, xmm7
  1399. movdqa xmm7, OWORD PTR [r15+224]
  1400. L_AES_GCM_encrypt_aesenc_128_ghash_avx_done:
  1401. aesenclast xmm8, xmm7
  1402. aesenclast xmm9, xmm7
  1403. movdqu xmm0, [rcx]
  1404. movdqu xmm1, [rcx+16]
  1405. pxor xmm8, xmm0
  1406. pxor xmm9, xmm1
  1407. movdqu [rdx], xmm8
  1408. movdqu [rdx+16], xmm9
  1409. aesenclast xmm10, xmm7
  1410. aesenclast xmm11, xmm7
  1411. movdqu xmm0, [rcx+32]
  1412. movdqu xmm1, [rcx+48]
  1413. pxor xmm10, xmm0
  1414. pxor xmm11, xmm1
  1415. movdqu [rdx+32], xmm10
  1416. movdqu [rdx+48], xmm11
  1417. aesenclast xmm12, xmm7
  1418. aesenclast xmm13, xmm7
  1419. movdqu xmm0, [rcx+64]
  1420. movdqu xmm1, [rcx+80]
  1421. pxor xmm12, xmm0
  1422. pxor xmm13, xmm1
  1423. movdqu [rdx+64], xmm12
  1424. movdqu [rdx+80], xmm13
  1425. aesenclast xmm14, xmm7
  1426. aesenclast xmm15, xmm7
  1427. movdqu xmm0, [rcx+96]
  1428. movdqu xmm1, [rcx+112]
  1429. pxor xmm14, xmm0
  1430. pxor xmm15, xmm1
  1431. movdqu [rdx+96], xmm14
  1432. movdqu [rdx+112], xmm15
  1433. add ebx, 128
  1434. cmp ebx, r13d
  1435. jl L_AES_GCM_encrypt_ghash_128
  1436. L_AES_GCM_encrypt_end_128:
  1437. movdqa xmm4, OWORD PTR L_aes_gcm_bswap_mask
  1438. pshufb xmm8, xmm4
  1439. pshufb xmm9, xmm4
  1440. pshufb xmm10, xmm4
  1441. pshufb xmm11, xmm4
  1442. pxor xmm8, xmm2
  1443. pshufb xmm12, xmm4
  1444. pshufb xmm13, xmm4
  1445. pshufb xmm14, xmm4
  1446. pshufb xmm15, xmm4
  1447. movdqu xmm7, [rsp+112]
  1448. pshufd xmm1, xmm8, 78
  1449. pshufd xmm2, xmm7, 78
  1450. movdqa xmm3, xmm7
  1451. movdqa xmm0, xmm7
  1452. pclmulqdq xmm3, xmm8, 17
  1453. pclmulqdq xmm0, xmm8, 0
  1454. pxor xmm1, xmm8
  1455. pxor xmm2, xmm7
  1456. pclmulqdq xmm1, xmm2, 0
  1457. pxor xmm1, xmm0
  1458. pxor xmm1, xmm3
  1459. movdqa xmm2, xmm1
  1460. movdqa xmm4, xmm0
  1461. movdqa xmm6, xmm3
  1462. pslldq xmm2, 8
  1463. psrldq xmm1, 8
  1464. pxor xmm4, xmm2
  1465. pxor xmm6, xmm1
  1466. movdqu xmm7, [rsp+96]
  1467. pshufd xmm1, xmm9, 78
  1468. pshufd xmm2, xmm7, 78
  1469. movdqa xmm3, xmm7
  1470. movdqa xmm0, xmm7
  1471. pclmulqdq xmm3, xmm9, 17
  1472. pclmulqdq xmm0, xmm9, 0
  1473. pxor xmm1, xmm9
  1474. pxor xmm2, xmm7
  1475. pclmulqdq xmm1, xmm2, 0
  1476. pxor xmm1, xmm0
  1477. pxor xmm1, xmm3
  1478. movdqa xmm2, xmm1
  1479. pxor xmm4, xmm0
  1480. pxor xmm6, xmm3
  1481. pslldq xmm2, 8
  1482. psrldq xmm1, 8
  1483. pxor xmm4, xmm2
  1484. pxor xmm6, xmm1
  1485. movdqu xmm7, [rsp+80]
  1486. pshufd xmm1, xmm10, 78
  1487. pshufd xmm2, xmm7, 78
  1488. movdqa xmm3, xmm7
  1489. movdqa xmm0, xmm7
  1490. pclmulqdq xmm3, xmm10, 17
  1491. pclmulqdq xmm0, xmm10, 0
  1492. pxor xmm1, xmm10
  1493. pxor xmm2, xmm7
  1494. pclmulqdq xmm1, xmm2, 0
  1495. pxor xmm1, xmm0
  1496. pxor xmm1, xmm3
  1497. movdqa xmm2, xmm1
  1498. pxor xmm4, xmm0
  1499. pxor xmm6, xmm3
  1500. pslldq xmm2, 8
  1501. psrldq xmm1, 8
  1502. pxor xmm4, xmm2
  1503. pxor xmm6, xmm1
  1504. movdqu xmm7, [rsp+64]
  1505. pshufd xmm1, xmm11, 78
  1506. pshufd xmm2, xmm7, 78
  1507. movdqa xmm3, xmm7
  1508. movdqa xmm0, xmm7
  1509. pclmulqdq xmm3, xmm11, 17
  1510. pclmulqdq xmm0, xmm11, 0
  1511. pxor xmm1, xmm11
  1512. pxor xmm2, xmm7
  1513. pclmulqdq xmm1, xmm2, 0
  1514. pxor xmm1, xmm0
  1515. pxor xmm1, xmm3
  1516. movdqa xmm2, xmm1
  1517. pxor xmm4, xmm0
  1518. pxor xmm6, xmm3
  1519. pslldq xmm2, 8
  1520. psrldq xmm1, 8
  1521. pxor xmm4, xmm2
  1522. pxor xmm6, xmm1
  1523. movdqu xmm7, [rsp+48]
  1524. pshufd xmm1, xmm12, 78
  1525. pshufd xmm2, xmm7, 78
  1526. movdqa xmm3, xmm7
  1527. movdqa xmm0, xmm7
  1528. pclmulqdq xmm3, xmm12, 17
  1529. pclmulqdq xmm0, xmm12, 0
  1530. pxor xmm1, xmm12
  1531. pxor xmm2, xmm7
  1532. pclmulqdq xmm1, xmm2, 0
  1533. pxor xmm1, xmm0
  1534. pxor xmm1, xmm3
  1535. movdqa xmm2, xmm1
  1536. pxor xmm4, xmm0
  1537. pxor xmm6, xmm3
  1538. pslldq xmm2, 8
  1539. psrldq xmm1, 8
  1540. pxor xmm4, xmm2
  1541. pxor xmm6, xmm1
  1542. movdqu xmm7, [rsp+32]
  1543. pshufd xmm1, xmm13, 78
  1544. pshufd xmm2, xmm7, 78
  1545. movdqa xmm3, xmm7
  1546. movdqa xmm0, xmm7
  1547. pclmulqdq xmm3, xmm13, 17
  1548. pclmulqdq xmm0, xmm13, 0
  1549. pxor xmm1, xmm13
  1550. pxor xmm2, xmm7
  1551. pclmulqdq xmm1, xmm2, 0
  1552. pxor xmm1, xmm0
  1553. pxor xmm1, xmm3
  1554. movdqa xmm2, xmm1
  1555. pxor xmm4, xmm0
  1556. pxor xmm6, xmm3
  1557. pslldq xmm2, 8
  1558. psrldq xmm1, 8
  1559. pxor xmm4, xmm2
  1560. pxor xmm6, xmm1
  1561. movdqu xmm7, [rsp+16]
  1562. pshufd xmm1, xmm14, 78
  1563. pshufd xmm2, xmm7, 78
  1564. movdqa xmm3, xmm7
  1565. movdqa xmm0, xmm7
  1566. pclmulqdq xmm3, xmm14, 17
  1567. pclmulqdq xmm0, xmm14, 0
  1568. pxor xmm1, xmm14
  1569. pxor xmm2, xmm7
  1570. pclmulqdq xmm1, xmm2, 0
  1571. pxor xmm1, xmm0
  1572. pxor xmm1, xmm3
  1573. movdqa xmm2, xmm1
  1574. pxor xmm4, xmm0
  1575. pxor xmm6, xmm3
  1576. pslldq xmm2, 8
  1577. psrldq xmm1, 8
  1578. pxor xmm4, xmm2
  1579. pxor xmm6, xmm1
  1580. movdqu xmm7, [rsp]
  1581. pshufd xmm1, xmm15, 78
  1582. pshufd xmm2, xmm7, 78
  1583. movdqa xmm3, xmm7
  1584. movdqa xmm0, xmm7
  1585. pclmulqdq xmm3, xmm15, 17
  1586. pclmulqdq xmm0, xmm15, 0
  1587. pxor xmm1, xmm15
  1588. pxor xmm2, xmm7
  1589. pclmulqdq xmm1, xmm2, 0
  1590. pxor xmm1, xmm0
  1591. pxor xmm1, xmm3
  1592. movdqa xmm2, xmm1
  1593. pxor xmm4, xmm0
  1594. pxor xmm6, xmm3
  1595. pslldq xmm2, 8
  1596. psrldq xmm1, 8
  1597. pxor xmm4, xmm2
  1598. pxor xmm6, xmm1
  1599. movdqa xmm0, xmm4
  1600. movdqa xmm1, xmm4
  1601. movdqa xmm2, xmm4
  1602. pslld xmm0, 31
  1603. pslld xmm1, 30
  1604. pslld xmm2, 25
  1605. pxor xmm0, xmm1
  1606. pxor xmm0, xmm2
  1607. movdqa xmm1, xmm0
  1608. psrldq xmm1, 4
  1609. pslldq xmm0, 12
  1610. pxor xmm4, xmm0
  1611. movdqa xmm2, xmm4
  1612. movdqa xmm3, xmm4
  1613. movdqa xmm0, xmm4
  1614. psrld xmm2, 1
  1615. psrld xmm3, 2
  1616. psrld xmm0, 7
  1617. pxor xmm2, xmm3
  1618. pxor xmm2, xmm0
  1619. pxor xmm2, xmm1
  1620. pxor xmm2, xmm4
  1621. pxor xmm6, xmm2
  1622. movdqu xmm5, [rsp]
  1623. L_AES_GCM_encrypt_done_128:
  1624. mov edx, r9d
  1625. cmp ebx, edx
  1626. jge L_AES_GCM_encrypt_done_enc
  1627. mov r13d, r9d
  1628. and r13d, 4294967280
  1629. cmp ebx, r13d
  1630. jge L_AES_GCM_encrypt_last_block_done
  1631. lea rcx, QWORD PTR [rdi+rbx]
  1632. lea rdx, QWORD PTR [rsi+rbx]
  1633. movdqu xmm8, [rsp+128]
  1634. movdqa xmm9, xmm8
  1635. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  1636. paddd xmm9, OWORD PTR L_aes_gcm_one
  1637. pxor xmm8, [r15]
  1638. movdqu [rsp+128], xmm9
  1639. aesenc xmm8, [r15+16]
  1640. aesenc xmm8, [r15+32]
  1641. aesenc xmm8, [r15+48]
  1642. aesenc xmm8, [r15+64]
  1643. aesenc xmm8, [r15+80]
  1644. aesenc xmm8, [r15+96]
  1645. aesenc xmm8, [r15+112]
  1646. aesenc xmm8, [r15+128]
  1647. aesenc xmm8, [r15+144]
  1648. cmp r10d, 11
  1649. movdqa xmm9, OWORD PTR [r15+160]
  1650. jl L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last
  1651. aesenc xmm8, xmm9
  1652. aesenc xmm8, [r15+176]
  1653. cmp r10d, 13
  1654. movdqa xmm9, OWORD PTR [r15+192]
  1655. jl L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last
  1656. aesenc xmm8, xmm9
  1657. aesenc xmm8, [r15+208]
  1658. movdqa xmm9, OWORD PTR [r15+224]
  1659. L_AES_GCM_encrypt_aesenc_block_aesenc_avx_last:
  1660. aesenclast xmm8, xmm9
  1661. movdqu xmm9, [rcx]
  1662. pxor xmm8, xmm9
  1663. movdqu [rdx], xmm8
  1664. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  1665. pxor xmm6, xmm8
  1666. add ebx, 16
  1667. cmp ebx, r13d
  1668. jge L_AES_GCM_encrypt_last_block_ghash
  1669. L_AES_GCM_encrypt_last_block_start:
  1670. lea rcx, QWORD PTR [rdi+rbx]
  1671. lea rdx, QWORD PTR [rsi+rbx]
  1672. movdqu xmm8, [rsp+128]
  1673. movdqa xmm9, xmm8
  1674. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  1675. paddd xmm9, OWORD PTR L_aes_gcm_one
  1676. pxor xmm8, [r15]
  1677. movdqu [rsp+128], xmm9
  1678. movdqa xmm10, xmm6
  1679. pclmulqdq xmm10, xmm5, 16
  1680. aesenc xmm8, [r15+16]
  1681. aesenc xmm8, [r15+32]
  1682. movdqa xmm11, xmm6
  1683. pclmulqdq xmm11, xmm5, 1
  1684. aesenc xmm8, [r15+48]
  1685. aesenc xmm8, [r15+64]
  1686. movdqa xmm12, xmm6
  1687. pclmulqdq xmm12, xmm5, 0
  1688. aesenc xmm8, [r15+80]
  1689. movdqa xmm1, xmm6
  1690. pclmulqdq xmm1, xmm5, 17
  1691. aesenc xmm8, [r15+96]
  1692. pxor xmm10, xmm11
  1693. movdqa xmm2, xmm10
  1694. psrldq xmm10, 8
  1695. pslldq xmm2, 8
  1696. aesenc xmm8, [r15+112]
  1697. movdqa xmm3, xmm1
  1698. pxor xmm2, xmm12
  1699. pxor xmm3, xmm10
  1700. movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
  1701. movdqa xmm11, xmm2
  1702. pclmulqdq xmm11, xmm0, 16
  1703. aesenc xmm8, [r15+128]
  1704. pshufd xmm10, xmm2, 78
  1705. pxor xmm10, xmm11
  1706. movdqa xmm11, xmm10
  1707. pclmulqdq xmm11, xmm0, 16
  1708. aesenc xmm8, [r15+144]
  1709. pshufd xmm6, xmm10, 78
  1710. pxor xmm6, xmm11
  1711. pxor xmm6, xmm3
  1712. cmp r10d, 11
  1713. movdqa xmm9, OWORD PTR [r15+160]
  1714. jl L_AES_GCM_encrypt_aesenc_gfmul_last
  1715. aesenc xmm8, xmm9
  1716. aesenc xmm8, [r15+176]
  1717. cmp r10d, 13
  1718. movdqa xmm9, OWORD PTR [r15+192]
  1719. jl L_AES_GCM_encrypt_aesenc_gfmul_last
  1720. aesenc xmm8, xmm9
  1721. aesenc xmm8, [r15+208]
  1722. movdqa xmm9, OWORD PTR [r15+224]
  1723. L_AES_GCM_encrypt_aesenc_gfmul_last:
  1724. aesenclast xmm8, xmm9
  1725. movdqu xmm9, [rcx]
  1726. pxor xmm8, xmm9
  1727. movdqu [rdx], xmm8
  1728. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  1729. pxor xmm6, xmm8
  1730. add ebx, 16
  1731. cmp ebx, r13d
  1732. jl L_AES_GCM_encrypt_last_block_start
  1733. L_AES_GCM_encrypt_last_block_ghash:
  1734. pshufd xmm9, xmm5, 78
  1735. pshufd xmm10, xmm6, 78
  1736. movdqa xmm11, xmm6
  1737. movdqa xmm8, xmm6
  1738. pclmulqdq xmm11, xmm5, 17
  1739. pclmulqdq xmm8, xmm5, 0
  1740. pxor xmm9, xmm5
  1741. pxor xmm10, xmm6
  1742. pclmulqdq xmm9, xmm10, 0
  1743. pxor xmm9, xmm8
  1744. pxor xmm9, xmm11
  1745. movdqa xmm10, xmm9
  1746. movdqa xmm6, xmm11
  1747. pslldq xmm10, 8
  1748. psrldq xmm9, 8
  1749. pxor xmm8, xmm10
  1750. pxor xmm6, xmm9
  1751. movdqa xmm12, xmm8
  1752. movdqa xmm13, xmm8
  1753. movdqa xmm14, xmm8
  1754. pslld xmm12, 31
  1755. pslld xmm13, 30
  1756. pslld xmm14, 25
  1757. pxor xmm12, xmm13
  1758. pxor xmm12, xmm14
  1759. movdqa xmm13, xmm12
  1760. psrldq xmm13, 4
  1761. pslldq xmm12, 12
  1762. pxor xmm8, xmm12
  1763. movdqa xmm14, xmm8
  1764. movdqa xmm10, xmm8
  1765. movdqa xmm9, xmm8
  1766. psrld xmm14, 1
  1767. psrld xmm10, 2
  1768. psrld xmm9, 7
  1769. pxor xmm14, xmm10
  1770. pxor xmm14, xmm9
  1771. pxor xmm14, xmm13
  1772. pxor xmm14, xmm8
  1773. pxor xmm6, xmm14
  1774. L_AES_GCM_encrypt_last_block_done:
  1775. mov ecx, r9d
  1776. mov edx, ecx
  1777. and ecx, 15
  1778. jz L_AES_GCM_encrypt_aesenc_last15_enc_avx_done
  1779. movdqu xmm4, [rsp+128]
  1780. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
  1781. pxor xmm4, [r15]
  1782. aesenc xmm4, [r15+16]
  1783. aesenc xmm4, [r15+32]
  1784. aesenc xmm4, [r15+48]
  1785. aesenc xmm4, [r15+64]
  1786. aesenc xmm4, [r15+80]
  1787. aesenc xmm4, [r15+96]
  1788. aesenc xmm4, [r15+112]
  1789. aesenc xmm4, [r15+128]
  1790. aesenc xmm4, [r15+144]
  1791. cmp r10d, 11
  1792. movdqa xmm9, OWORD PTR [r15+160]
  1793. jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last
  1794. aesenc xmm4, xmm9
  1795. aesenc xmm4, [r15+176]
  1796. cmp r10d, 13
  1797. movdqa xmm9, OWORD PTR [r15+192]
  1798. jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last
  1799. aesenc xmm4, xmm9
  1800. aesenc xmm4, [r15+208]
  1801. movdqa xmm9, OWORD PTR [r15+224]
  1802. L_AES_GCM_encrypt_aesenc_last15_enc_avx_aesenc_avx_last:
  1803. aesenclast xmm4, xmm9
  1804. sub rsp, 16
  1805. xor ecx, ecx
  1806. movdqu [rsp], xmm4
  1807. L_AES_GCM_encrypt_aesenc_last15_enc_avx_loop:
  1808. movzx r13d, BYTE PTR [rdi+rbx]
  1809. xor r13b, BYTE PTR [rsp+rcx]
  1810. mov BYTE PTR [rsi+rbx], r13b
  1811. mov BYTE PTR [rsp+rcx], r13b
  1812. inc ebx
  1813. inc ecx
  1814. cmp ebx, edx
  1815. jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_loop
  1816. xor r13, r13
  1817. cmp ecx, 16
  1818. je L_AES_GCM_encrypt_aesenc_last15_enc_avx_finish_enc
  1819. L_AES_GCM_encrypt_aesenc_last15_enc_avx_byte_loop:
  1820. mov BYTE PTR [rsp+rcx], r13b
  1821. inc ecx
  1822. cmp ecx, 16
  1823. jl L_AES_GCM_encrypt_aesenc_last15_enc_avx_byte_loop
  1824. L_AES_GCM_encrypt_aesenc_last15_enc_avx_finish_enc:
  1825. movdqu xmm4, [rsp]
  1826. add rsp, 16
  1827. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  1828. pxor xmm6, xmm4
  1829. pshufd xmm9, xmm5, 78
  1830. pshufd xmm10, xmm6, 78
  1831. movdqa xmm11, xmm6
  1832. movdqa xmm8, xmm6
  1833. pclmulqdq xmm11, xmm5, 17
  1834. pclmulqdq xmm8, xmm5, 0
  1835. pxor xmm9, xmm5
  1836. pxor xmm10, xmm6
  1837. pclmulqdq xmm9, xmm10, 0
  1838. pxor xmm9, xmm8
  1839. pxor xmm9, xmm11
  1840. movdqa xmm10, xmm9
  1841. movdqa xmm6, xmm11
  1842. pslldq xmm10, 8
  1843. psrldq xmm9, 8
  1844. pxor xmm8, xmm10
  1845. pxor xmm6, xmm9
  1846. movdqa xmm12, xmm8
  1847. movdqa xmm13, xmm8
  1848. movdqa xmm14, xmm8
  1849. pslld xmm12, 31
  1850. pslld xmm13, 30
  1851. pslld xmm14, 25
  1852. pxor xmm12, xmm13
  1853. pxor xmm12, xmm14
  1854. movdqa xmm13, xmm12
  1855. psrldq xmm13, 4
  1856. pslldq xmm12, 12
  1857. pxor xmm8, xmm12
  1858. movdqa xmm14, xmm8
  1859. movdqa xmm10, xmm8
  1860. movdqa xmm9, xmm8
  1861. psrld xmm14, 1
  1862. psrld xmm10, 2
  1863. psrld xmm9, 7
  1864. pxor xmm14, xmm10
  1865. pxor xmm14, xmm9
  1866. pxor xmm14, xmm13
  1867. pxor xmm14, xmm8
  1868. pxor xmm6, xmm14
  1869. L_AES_GCM_encrypt_aesenc_last15_enc_avx_done:
  1870. L_AES_GCM_encrypt_done_enc:
  1871. mov edx, r9d
  1872. mov ecx, r11d
  1873. shl rdx, 3
  1874. shl rcx, 3
  1875. pinsrq xmm0, rdx, 0
  1876. pinsrq xmm0, rcx, 1
  1877. pxor xmm6, xmm0
  1878. pshufd xmm9, xmm5, 78
  1879. pshufd xmm10, xmm6, 78
  1880. movdqa xmm11, xmm6
  1881. movdqa xmm8, xmm6
  1882. pclmulqdq xmm11, xmm5, 17
  1883. pclmulqdq xmm8, xmm5, 0
  1884. pxor xmm9, xmm5
  1885. pxor xmm10, xmm6
  1886. pclmulqdq xmm9, xmm10, 0
  1887. pxor xmm9, xmm8
  1888. pxor xmm9, xmm11
  1889. movdqa xmm10, xmm9
  1890. movdqa xmm6, xmm11
  1891. pslldq xmm10, 8
  1892. psrldq xmm9, 8
  1893. pxor xmm8, xmm10
  1894. pxor xmm6, xmm9
  1895. movdqa xmm12, xmm8
  1896. movdqa xmm13, xmm8
  1897. movdqa xmm14, xmm8
  1898. pslld xmm12, 31
  1899. pslld xmm13, 30
  1900. pslld xmm14, 25
  1901. pxor xmm12, xmm13
  1902. pxor xmm12, xmm14
  1903. movdqa xmm13, xmm12
  1904. psrldq xmm13, 4
  1905. pslldq xmm12, 12
  1906. pxor xmm8, xmm12
  1907. movdqa xmm14, xmm8
  1908. movdqa xmm10, xmm8
  1909. movdqa xmm9, xmm8
  1910. psrld xmm14, 1
  1911. psrld xmm10, 2
  1912. psrld xmm9, 7
  1913. pxor xmm14, xmm10
  1914. pxor xmm14, xmm9
  1915. pxor xmm14, xmm13
  1916. pxor xmm14, xmm8
  1917. pxor xmm6, xmm14
  1918. pshufb xmm6, OWORD PTR L_aes_gcm_bswap_mask
  1919. movdqu xmm0, [rsp+144]
  1920. pxor xmm0, xmm6
  1921. cmp r14d, 16
  1922. je L_AES_GCM_encrypt_store_tag_16
  1923. xor rcx, rcx
  1924. movdqu [rsp], xmm0
  1925. L_AES_GCM_encrypt_store_tag_loop:
  1926. movzx r13d, BYTE PTR [rsp+rcx]
  1927. mov BYTE PTR [r8+rcx], r13b
  1928. inc ecx
  1929. cmp ecx, r14d
  1930. jne L_AES_GCM_encrypt_store_tag_loop
  1931. jmp L_AES_GCM_encrypt_store_tag_done
  1932. L_AES_GCM_encrypt_store_tag_16:
  1933. movdqu [r8], xmm0
  1934. L_AES_GCM_encrypt_store_tag_done:
  1935. add rsp, 160
  1936. pop r15
  1937. pop r14
  1938. pop rbx
  1939. pop r12
  1940. pop rsi
  1941. pop rdi
  1942. pop r13
  1943. ret
  1944. AES_GCM_encrypt ENDP
  1945. _text ENDS
  1946. _text SEGMENT READONLY PARA
  1947. AES_GCM_decrypt PROC
  1948. push r13
  1949. push rdi
  1950. push rsi
  1951. push r12
  1952. push rbx
  1953. push r14
  1954. push r15
  1955. push rbp
  1956. mov rdi, rcx
  1957. mov rsi, rdx
  1958. mov r12, r8
  1959. mov rax, r9
  1960. mov r8, QWORD PTR [rsp+104]
  1961. mov r9d, DWORD PTR [rsp+112]
  1962. mov r11d, DWORD PTR [rsp+120]
  1963. mov ebx, DWORD PTR [rsp+128]
  1964. mov r14d, DWORD PTR [rsp+136]
  1965. mov r15, QWORD PTR [rsp+144]
  1966. mov r10d, DWORD PTR [rsp+152]
  1967. mov rbp, QWORD PTR [rsp+160]
  1968. sub rsp, 168
  1969. pxor xmm4, xmm4
  1970. pxor xmm6, xmm6
  1971. cmp ebx, 12
  1972. mov edx, ebx
  1973. jne L_AES_GCM_decrypt_iv_not_12
  1974. ; # Calculate values when IV is 12 bytes
  1975. ; Set counter based on IV
  1976. mov ecx, 16777216
  1977. pinsrq xmm4, QWORD PTR [rax], 0
  1978. pinsrd xmm4, DWORD PTR [rax+8], 2
  1979. pinsrd xmm4, ecx, 3
  1980. ; H = Encrypt X(=0) and T = Encrypt counter
  1981. movdqa xmm1, xmm4
  1982. movdqa xmm5, OWORD PTR [r15]
  1983. pxor xmm1, xmm5
  1984. movdqa xmm7, OWORD PTR [r15+16]
  1985. aesenc xmm5, xmm7
  1986. aesenc xmm1, xmm7
  1987. movdqa xmm7, OWORD PTR [r15+32]
  1988. aesenc xmm5, xmm7
  1989. aesenc xmm1, xmm7
  1990. movdqa xmm7, OWORD PTR [r15+48]
  1991. aesenc xmm5, xmm7
  1992. aesenc xmm1, xmm7
  1993. movdqa xmm7, OWORD PTR [r15+64]
  1994. aesenc xmm5, xmm7
  1995. aesenc xmm1, xmm7
  1996. movdqa xmm7, OWORD PTR [r15+80]
  1997. aesenc xmm5, xmm7
  1998. aesenc xmm1, xmm7
  1999. movdqa xmm7, OWORD PTR [r15+96]
  2000. aesenc xmm5, xmm7
  2001. aesenc xmm1, xmm7
  2002. movdqa xmm7, OWORD PTR [r15+112]
  2003. aesenc xmm5, xmm7
  2004. aesenc xmm1, xmm7
  2005. movdqa xmm7, OWORD PTR [r15+128]
  2006. aesenc xmm5, xmm7
  2007. aesenc xmm1, xmm7
  2008. movdqa xmm7, OWORD PTR [r15+144]
  2009. aesenc xmm5, xmm7
  2010. aesenc xmm1, xmm7
  2011. cmp r10d, 11
  2012. movdqa xmm7, OWORD PTR [r15+160]
  2013. jl L_AES_GCM_decrypt_calc_iv_12_last
  2014. aesenc xmm5, xmm7
  2015. aesenc xmm1, xmm7
  2016. movdqa xmm7, OWORD PTR [r15+176]
  2017. aesenc xmm5, xmm7
  2018. aesenc xmm1, xmm7
  2019. cmp r10d, 13
  2020. movdqa xmm7, OWORD PTR [r15+192]
  2021. jl L_AES_GCM_decrypt_calc_iv_12_last
  2022. aesenc xmm5, xmm7
  2023. aesenc xmm1, xmm7
  2024. movdqa xmm7, OWORD PTR [r15+208]
  2025. aesenc xmm5, xmm7
  2026. aesenc xmm1, xmm7
  2027. movdqa xmm7, OWORD PTR [r15+224]
  2028. L_AES_GCM_decrypt_calc_iv_12_last:
  2029. aesenclast xmm5, xmm7
  2030. aesenclast xmm1, xmm7
  2031. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  2032. movdqu [rsp+144], xmm1
  2033. jmp L_AES_GCM_decrypt_iv_done
  2034. L_AES_GCM_decrypt_iv_not_12:
  2035. ; Calculate values when IV is not 12 bytes
  2036. ; H = Encrypt X(=0)
  2037. movdqa xmm5, OWORD PTR [r15]
  2038. aesenc xmm5, [r15+16]
  2039. aesenc xmm5, [r15+32]
  2040. aesenc xmm5, [r15+48]
  2041. aesenc xmm5, [r15+64]
  2042. aesenc xmm5, [r15+80]
  2043. aesenc xmm5, [r15+96]
  2044. aesenc xmm5, [r15+112]
  2045. aesenc xmm5, [r15+128]
  2046. aesenc xmm5, [r15+144]
  2047. cmp r10d, 11
  2048. movdqa xmm9, OWORD PTR [r15+160]
  2049. jl L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last
  2050. aesenc xmm5, xmm9
  2051. aesenc xmm5, [r15+176]
  2052. cmp r10d, 13
  2053. movdqa xmm9, OWORD PTR [r15+192]
  2054. jl L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last
  2055. aesenc xmm5, xmm9
  2056. aesenc xmm5, [r15+208]
  2057. movdqa xmm9, OWORD PTR [r15+224]
  2058. L_AES_GCM_decrypt_calc_iv_1_aesenc_avx_last:
  2059. aesenclast xmm5, xmm9
  2060. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  2061. ; Calc counter
  2062. ; Initialization vector
  2063. cmp edx, 0
  2064. mov rcx, 0
  2065. je L_AES_GCM_decrypt_calc_iv_done
  2066. cmp edx, 16
  2067. jl L_AES_GCM_decrypt_calc_iv_lt16
  2068. and edx, 4294967280
  2069. L_AES_GCM_decrypt_calc_iv_16_loop:
  2070. movdqu xmm8, [rax+rcx]
  2071. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  2072. pxor xmm4, xmm8
  2073. pshufd xmm1, xmm4, 78
  2074. pshufd xmm2, xmm5, 78
  2075. movdqa xmm3, xmm5
  2076. movdqa xmm0, xmm5
  2077. pclmulqdq xmm3, xmm4, 17
  2078. pclmulqdq xmm0, xmm4, 0
  2079. pxor xmm1, xmm4
  2080. pxor xmm2, xmm5
  2081. pclmulqdq xmm1, xmm2, 0
  2082. pxor xmm1, xmm0
  2083. pxor xmm1, xmm3
  2084. movdqa xmm2, xmm1
  2085. movdqa xmm7, xmm0
  2086. movdqa xmm4, xmm3
  2087. pslldq xmm2, 8
  2088. psrldq xmm1, 8
  2089. pxor xmm7, xmm2
  2090. pxor xmm4, xmm1
  2091. movdqa xmm0, xmm7
  2092. movdqa xmm1, xmm4
  2093. psrld xmm0, 31
  2094. psrld xmm1, 31
  2095. pslld xmm7, 1
  2096. pslld xmm4, 1
  2097. movdqa xmm2, xmm0
  2098. pslldq xmm0, 4
  2099. psrldq xmm2, 12
  2100. pslldq xmm1, 4
  2101. por xmm4, xmm2
  2102. por xmm7, xmm0
  2103. por xmm4, xmm1
  2104. movdqa xmm0, xmm7
  2105. movdqa xmm1, xmm7
  2106. movdqa xmm2, xmm7
  2107. pslld xmm0, 31
  2108. pslld xmm1, 30
  2109. pslld xmm2, 25
  2110. pxor xmm0, xmm1
  2111. pxor xmm0, xmm2
  2112. movdqa xmm1, xmm0
  2113. psrldq xmm1, 4
  2114. pslldq xmm0, 12
  2115. pxor xmm7, xmm0
  2116. movdqa xmm2, xmm7
  2117. movdqa xmm3, xmm7
  2118. movdqa xmm0, xmm7
  2119. psrld xmm2, 1
  2120. psrld xmm3, 2
  2121. psrld xmm0, 7
  2122. pxor xmm2, xmm3
  2123. pxor xmm2, xmm0
  2124. pxor xmm2, xmm1
  2125. pxor xmm2, xmm7
  2126. pxor xmm4, xmm2
  2127. add ecx, 16
  2128. cmp ecx, edx
  2129. jl L_AES_GCM_decrypt_calc_iv_16_loop
  2130. mov edx, ebx
  2131. cmp ecx, edx
  2132. je L_AES_GCM_decrypt_calc_iv_done
  2133. L_AES_GCM_decrypt_calc_iv_lt16:
  2134. sub rsp, 16
  2135. pxor xmm8, xmm8
  2136. xor ebx, ebx
  2137. movdqu [rsp], xmm8
  2138. L_AES_GCM_decrypt_calc_iv_loop:
  2139. movzx r13d, BYTE PTR [rax+rcx]
  2140. mov BYTE PTR [rsp+rbx], r13b
  2141. inc ecx
  2142. inc ebx
  2143. cmp ecx, edx
  2144. jl L_AES_GCM_decrypt_calc_iv_loop
  2145. movdqu xmm8, [rsp]
  2146. add rsp, 16
  2147. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  2148. pxor xmm4, xmm8
  2149. pshufd xmm1, xmm4, 78
  2150. pshufd xmm2, xmm5, 78
  2151. movdqa xmm3, xmm5
  2152. movdqa xmm0, xmm5
  2153. pclmulqdq xmm3, xmm4, 17
  2154. pclmulqdq xmm0, xmm4, 0
  2155. pxor xmm1, xmm4
  2156. pxor xmm2, xmm5
  2157. pclmulqdq xmm1, xmm2, 0
  2158. pxor xmm1, xmm0
  2159. pxor xmm1, xmm3
  2160. movdqa xmm2, xmm1
  2161. movdqa xmm7, xmm0
  2162. movdqa xmm4, xmm3
  2163. pslldq xmm2, 8
  2164. psrldq xmm1, 8
  2165. pxor xmm7, xmm2
  2166. pxor xmm4, xmm1
  2167. movdqa xmm0, xmm7
  2168. movdqa xmm1, xmm4
  2169. psrld xmm0, 31
  2170. psrld xmm1, 31
  2171. pslld xmm7, 1
  2172. pslld xmm4, 1
  2173. movdqa xmm2, xmm0
  2174. pslldq xmm0, 4
  2175. psrldq xmm2, 12
  2176. pslldq xmm1, 4
  2177. por xmm4, xmm2
  2178. por xmm7, xmm0
  2179. por xmm4, xmm1
  2180. movdqa xmm0, xmm7
  2181. movdqa xmm1, xmm7
  2182. movdqa xmm2, xmm7
  2183. pslld xmm0, 31
  2184. pslld xmm1, 30
  2185. pslld xmm2, 25
  2186. pxor xmm0, xmm1
  2187. pxor xmm0, xmm2
  2188. movdqa xmm1, xmm0
  2189. psrldq xmm1, 4
  2190. pslldq xmm0, 12
  2191. pxor xmm7, xmm0
  2192. movdqa xmm2, xmm7
  2193. movdqa xmm3, xmm7
  2194. movdqa xmm0, xmm7
  2195. psrld xmm2, 1
  2196. psrld xmm3, 2
  2197. psrld xmm0, 7
  2198. pxor xmm2, xmm3
  2199. pxor xmm2, xmm0
  2200. pxor xmm2, xmm1
  2201. pxor xmm2, xmm7
  2202. pxor xmm4, xmm2
  2203. L_AES_GCM_decrypt_calc_iv_done:
  2204. ; T = Encrypt counter
  2205. pxor xmm0, xmm0
  2206. shl edx, 3
  2207. pinsrq xmm0, rdx, 0
  2208. pxor xmm4, xmm0
  2209. pshufd xmm1, xmm4, 78
  2210. pshufd xmm2, xmm5, 78
  2211. movdqa xmm3, xmm5
  2212. movdqa xmm0, xmm5
  2213. pclmulqdq xmm3, xmm4, 17
  2214. pclmulqdq xmm0, xmm4, 0
  2215. pxor xmm1, xmm4
  2216. pxor xmm2, xmm5
  2217. pclmulqdq xmm1, xmm2, 0
  2218. pxor xmm1, xmm0
  2219. pxor xmm1, xmm3
  2220. movdqa xmm2, xmm1
  2221. movdqa xmm7, xmm0
  2222. movdqa xmm4, xmm3
  2223. pslldq xmm2, 8
  2224. psrldq xmm1, 8
  2225. pxor xmm7, xmm2
  2226. pxor xmm4, xmm1
  2227. movdqa xmm0, xmm7
  2228. movdqa xmm1, xmm4
  2229. psrld xmm0, 31
  2230. psrld xmm1, 31
  2231. pslld xmm7, 1
  2232. pslld xmm4, 1
  2233. movdqa xmm2, xmm0
  2234. pslldq xmm0, 4
  2235. psrldq xmm2, 12
  2236. pslldq xmm1, 4
  2237. por xmm4, xmm2
  2238. por xmm7, xmm0
  2239. por xmm4, xmm1
  2240. movdqa xmm0, xmm7
  2241. movdqa xmm1, xmm7
  2242. movdqa xmm2, xmm7
  2243. pslld xmm0, 31
  2244. pslld xmm1, 30
  2245. pslld xmm2, 25
  2246. pxor xmm0, xmm1
  2247. pxor xmm0, xmm2
  2248. movdqa xmm1, xmm0
  2249. psrldq xmm1, 4
  2250. pslldq xmm0, 12
  2251. pxor xmm7, xmm0
  2252. movdqa xmm2, xmm7
  2253. movdqa xmm3, xmm7
  2254. movdqa xmm0, xmm7
  2255. psrld xmm2, 1
  2256. psrld xmm3, 2
  2257. psrld xmm0, 7
  2258. pxor xmm2, xmm3
  2259. pxor xmm2, xmm0
  2260. pxor xmm2, xmm1
  2261. pxor xmm2, xmm7
  2262. pxor xmm4, xmm2
  2263. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  2264. ; Encrypt counter
  2265. movdqa xmm8, OWORD PTR [r15]
  2266. pxor xmm8, xmm4
  2267. aesenc xmm8, [r15+16]
  2268. aesenc xmm8, [r15+32]
  2269. aesenc xmm8, [r15+48]
  2270. aesenc xmm8, [r15+64]
  2271. aesenc xmm8, [r15+80]
  2272. aesenc xmm8, [r15+96]
  2273. aesenc xmm8, [r15+112]
  2274. aesenc xmm8, [r15+128]
  2275. aesenc xmm8, [r15+144]
  2276. cmp r10d, 11
  2277. movdqa xmm9, OWORD PTR [r15+160]
  2278. jl L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last
  2279. aesenc xmm8, xmm9
  2280. aesenc xmm8, [r15+176]
  2281. cmp r10d, 13
  2282. movdqa xmm9, OWORD PTR [r15+192]
  2283. jl L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last
  2284. aesenc xmm8, xmm9
  2285. aesenc xmm8, [r15+208]
  2286. movdqa xmm9, OWORD PTR [r15+224]
  2287. L_AES_GCM_decrypt_calc_iv_2_aesenc_avx_last:
  2288. aesenclast xmm8, xmm9
  2289. movdqu [rsp+144], xmm8
  2290. L_AES_GCM_decrypt_iv_done:
  2291. ; Additional authentication data
  2292. mov edx, r11d
  2293. cmp edx, 0
  2294. je L_AES_GCM_decrypt_calc_aad_done
  2295. xor ecx, ecx
  2296. cmp edx, 16
  2297. jl L_AES_GCM_decrypt_calc_aad_lt16
  2298. and edx, 4294967280
  2299. L_AES_GCM_decrypt_calc_aad_16_loop:
  2300. movdqu xmm8, [r12+rcx]
  2301. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  2302. pxor xmm6, xmm8
  2303. pshufd xmm1, xmm6, 78
  2304. pshufd xmm2, xmm5, 78
  2305. movdqa xmm3, xmm5
  2306. movdqa xmm0, xmm5
  2307. pclmulqdq xmm3, xmm6, 17
  2308. pclmulqdq xmm0, xmm6, 0
  2309. pxor xmm1, xmm6
  2310. pxor xmm2, xmm5
  2311. pclmulqdq xmm1, xmm2, 0
  2312. pxor xmm1, xmm0
  2313. pxor xmm1, xmm3
  2314. movdqa xmm2, xmm1
  2315. movdqa xmm7, xmm0
  2316. movdqa xmm6, xmm3
  2317. pslldq xmm2, 8
  2318. psrldq xmm1, 8
  2319. pxor xmm7, xmm2
  2320. pxor xmm6, xmm1
  2321. movdqa xmm0, xmm7
  2322. movdqa xmm1, xmm6
  2323. psrld xmm0, 31
  2324. psrld xmm1, 31
  2325. pslld xmm7, 1
  2326. pslld xmm6, 1
  2327. movdqa xmm2, xmm0
  2328. pslldq xmm0, 4
  2329. psrldq xmm2, 12
  2330. pslldq xmm1, 4
  2331. por xmm6, xmm2
  2332. por xmm7, xmm0
  2333. por xmm6, xmm1
  2334. movdqa xmm0, xmm7
  2335. movdqa xmm1, xmm7
  2336. movdqa xmm2, xmm7
  2337. pslld xmm0, 31
  2338. pslld xmm1, 30
  2339. pslld xmm2, 25
  2340. pxor xmm0, xmm1
  2341. pxor xmm0, xmm2
  2342. movdqa xmm1, xmm0
  2343. psrldq xmm1, 4
  2344. pslldq xmm0, 12
  2345. pxor xmm7, xmm0
  2346. movdqa xmm2, xmm7
  2347. movdqa xmm3, xmm7
  2348. movdqa xmm0, xmm7
  2349. psrld xmm2, 1
  2350. psrld xmm3, 2
  2351. psrld xmm0, 7
  2352. pxor xmm2, xmm3
  2353. pxor xmm2, xmm0
  2354. pxor xmm2, xmm1
  2355. pxor xmm2, xmm7
  2356. pxor xmm6, xmm2
  2357. add ecx, 16
  2358. cmp ecx, edx
  2359. jl L_AES_GCM_decrypt_calc_aad_16_loop
  2360. mov edx, r11d
  2361. cmp ecx, edx
  2362. je L_AES_GCM_decrypt_calc_aad_done
  2363. L_AES_GCM_decrypt_calc_aad_lt16:
  2364. sub rsp, 16
  2365. pxor xmm8, xmm8
  2366. xor ebx, ebx
  2367. movdqu [rsp], xmm8
  2368. L_AES_GCM_decrypt_calc_aad_loop:
  2369. movzx r13d, BYTE PTR [r12+rcx]
  2370. mov BYTE PTR [rsp+rbx], r13b
  2371. inc ecx
  2372. inc ebx
  2373. cmp ecx, edx
  2374. jl L_AES_GCM_decrypt_calc_aad_loop
  2375. movdqu xmm8, [rsp]
  2376. add rsp, 16
  2377. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  2378. pxor xmm6, xmm8
  2379. pshufd xmm1, xmm6, 78
  2380. pshufd xmm2, xmm5, 78
  2381. movdqa xmm3, xmm5
  2382. movdqa xmm0, xmm5
  2383. pclmulqdq xmm3, xmm6, 17
  2384. pclmulqdq xmm0, xmm6, 0
  2385. pxor xmm1, xmm6
  2386. pxor xmm2, xmm5
  2387. pclmulqdq xmm1, xmm2, 0
  2388. pxor xmm1, xmm0
  2389. pxor xmm1, xmm3
  2390. movdqa xmm2, xmm1
  2391. movdqa xmm7, xmm0
  2392. movdqa xmm6, xmm3
  2393. pslldq xmm2, 8
  2394. psrldq xmm1, 8
  2395. pxor xmm7, xmm2
  2396. pxor xmm6, xmm1
  2397. movdqa xmm0, xmm7
  2398. movdqa xmm1, xmm6
  2399. psrld xmm0, 31
  2400. psrld xmm1, 31
  2401. pslld xmm7, 1
  2402. pslld xmm6, 1
  2403. movdqa xmm2, xmm0
  2404. pslldq xmm0, 4
  2405. psrldq xmm2, 12
  2406. pslldq xmm1, 4
  2407. por xmm6, xmm2
  2408. por xmm7, xmm0
  2409. por xmm6, xmm1
  2410. movdqa xmm0, xmm7
  2411. movdqa xmm1, xmm7
  2412. movdqa xmm2, xmm7
  2413. pslld xmm0, 31
  2414. pslld xmm1, 30
  2415. pslld xmm2, 25
  2416. pxor xmm0, xmm1
  2417. pxor xmm0, xmm2
  2418. movdqa xmm1, xmm0
  2419. psrldq xmm1, 4
  2420. pslldq xmm0, 12
  2421. pxor xmm7, xmm0
  2422. movdqa xmm2, xmm7
  2423. movdqa xmm3, xmm7
  2424. movdqa xmm0, xmm7
  2425. psrld xmm2, 1
  2426. psrld xmm3, 2
  2427. psrld xmm0, 7
  2428. pxor xmm2, xmm3
  2429. pxor xmm2, xmm0
  2430. pxor xmm2, xmm1
  2431. pxor xmm2, xmm7
  2432. pxor xmm6, xmm2
  2433. L_AES_GCM_decrypt_calc_aad_done:
  2434. ; Calculate counter and H
  2435. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
  2436. movdqa xmm9, xmm5
  2437. paddd xmm4, OWORD PTR L_aes_gcm_one
  2438. movdqa xmm8, xmm5
  2439. movdqu [rsp+128], xmm4
  2440. psrlq xmm9, 63
  2441. psllq xmm8, 1
  2442. pslldq xmm9, 8
  2443. por xmm8, xmm9
  2444. pshufd xmm5, xmm5, 255
  2445. psrad xmm5, 31
  2446. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  2447. pxor xmm5, xmm8
  2448. xor ebx, ebx
  2449. cmp r9d, 128
  2450. mov r13d, r9d
  2451. jl L_AES_GCM_decrypt_done_128
  2452. and r13d, 4294967168
  2453. movdqa xmm2, xmm6
  2454. ; H ^ 1
  2455. movdqu [rsp], xmm5
  2456. ; H ^ 2
  2457. pshufd xmm9, xmm5, 78
  2458. pshufd xmm10, xmm5, 78
  2459. movdqa xmm11, xmm5
  2460. movdqa xmm8, xmm5
  2461. pclmulqdq xmm11, xmm5, 17
  2462. pclmulqdq xmm8, xmm5, 0
  2463. pxor xmm9, xmm5
  2464. pxor xmm10, xmm5
  2465. pclmulqdq xmm9, xmm10, 0
  2466. pxor xmm9, xmm8
  2467. pxor xmm9, xmm11
  2468. movdqa xmm10, xmm9
  2469. movdqa xmm0, xmm11
  2470. pslldq xmm10, 8
  2471. psrldq xmm9, 8
  2472. pxor xmm8, xmm10
  2473. pxor xmm0, xmm9
  2474. movdqa xmm12, xmm8
  2475. movdqa xmm13, xmm8
  2476. movdqa xmm14, xmm8
  2477. pslld xmm12, 31
  2478. pslld xmm13, 30
  2479. pslld xmm14, 25
  2480. pxor xmm12, xmm13
  2481. pxor xmm12, xmm14
  2482. movdqa xmm13, xmm12
  2483. psrldq xmm13, 4
  2484. pslldq xmm12, 12
  2485. pxor xmm8, xmm12
  2486. movdqa xmm14, xmm8
  2487. movdqa xmm10, xmm8
  2488. movdqa xmm9, xmm8
  2489. psrld xmm14, 1
  2490. psrld xmm10, 2
  2491. psrld xmm9, 7
  2492. pxor xmm14, xmm10
  2493. pxor xmm14, xmm9
  2494. pxor xmm14, xmm13
  2495. pxor xmm14, xmm8
  2496. pxor xmm0, xmm14
  2497. movdqu [rsp+16], xmm0
  2498. ; H ^ 3
  2499. pshufd xmm9, xmm5, 78
  2500. pshufd xmm10, xmm0, 78
  2501. movdqa xmm11, xmm0
  2502. movdqa xmm8, xmm0
  2503. pclmulqdq xmm11, xmm5, 17
  2504. pclmulqdq xmm8, xmm5, 0
  2505. pxor xmm9, xmm5
  2506. pxor xmm10, xmm0
  2507. pclmulqdq xmm9, xmm10, 0
  2508. pxor xmm9, xmm8
  2509. pxor xmm9, xmm11
  2510. movdqa xmm10, xmm9
  2511. movdqa xmm1, xmm11
  2512. pslldq xmm10, 8
  2513. psrldq xmm9, 8
  2514. pxor xmm8, xmm10
  2515. pxor xmm1, xmm9
  2516. movdqa xmm12, xmm8
  2517. movdqa xmm13, xmm8
  2518. movdqa xmm14, xmm8
  2519. pslld xmm12, 31
  2520. pslld xmm13, 30
  2521. pslld xmm14, 25
  2522. pxor xmm12, xmm13
  2523. pxor xmm12, xmm14
  2524. movdqa xmm13, xmm12
  2525. psrldq xmm13, 4
  2526. pslldq xmm12, 12
  2527. pxor xmm8, xmm12
  2528. movdqa xmm14, xmm8
  2529. movdqa xmm10, xmm8
  2530. movdqa xmm9, xmm8
  2531. psrld xmm14, 1
  2532. psrld xmm10, 2
  2533. psrld xmm9, 7
  2534. pxor xmm14, xmm10
  2535. pxor xmm14, xmm9
  2536. pxor xmm14, xmm13
  2537. pxor xmm14, xmm8
  2538. pxor xmm1, xmm14
  2539. movdqu [rsp+32], xmm1
  2540. ; H ^ 4
  2541. pshufd xmm9, xmm0, 78
  2542. pshufd xmm10, xmm0, 78
  2543. movdqa xmm11, xmm0
  2544. movdqa xmm8, xmm0
  2545. pclmulqdq xmm11, xmm0, 17
  2546. pclmulqdq xmm8, xmm0, 0
  2547. pxor xmm9, xmm0
  2548. pxor xmm10, xmm0
  2549. pclmulqdq xmm9, xmm10, 0
  2550. pxor xmm9, xmm8
  2551. pxor xmm9, xmm11
  2552. movdqa xmm10, xmm9
  2553. movdqa xmm3, xmm11
  2554. pslldq xmm10, 8
  2555. psrldq xmm9, 8
  2556. pxor xmm8, xmm10
  2557. pxor xmm3, xmm9
  2558. movdqa xmm12, xmm8
  2559. movdqa xmm13, xmm8
  2560. movdqa xmm14, xmm8
  2561. pslld xmm12, 31
  2562. pslld xmm13, 30
  2563. pslld xmm14, 25
  2564. pxor xmm12, xmm13
  2565. pxor xmm12, xmm14
  2566. movdqa xmm13, xmm12
  2567. psrldq xmm13, 4
  2568. pslldq xmm12, 12
  2569. pxor xmm8, xmm12
  2570. movdqa xmm14, xmm8
  2571. movdqa xmm10, xmm8
  2572. movdqa xmm9, xmm8
  2573. psrld xmm14, 1
  2574. psrld xmm10, 2
  2575. psrld xmm9, 7
  2576. pxor xmm14, xmm10
  2577. pxor xmm14, xmm9
  2578. pxor xmm14, xmm13
  2579. pxor xmm14, xmm8
  2580. pxor xmm3, xmm14
  2581. movdqu [rsp+48], xmm3
  2582. ; H ^ 5
  2583. pshufd xmm9, xmm0, 78
  2584. pshufd xmm10, xmm1, 78
  2585. movdqa xmm11, xmm1
  2586. movdqa xmm8, xmm1
  2587. pclmulqdq xmm11, xmm0, 17
  2588. pclmulqdq xmm8, xmm0, 0
  2589. pxor xmm9, xmm0
  2590. pxor xmm10, xmm1
  2591. pclmulqdq xmm9, xmm10, 0
  2592. pxor xmm9, xmm8
  2593. pxor xmm9, xmm11
  2594. movdqa xmm10, xmm9
  2595. movdqa xmm7, xmm11
  2596. pslldq xmm10, 8
  2597. psrldq xmm9, 8
  2598. pxor xmm8, xmm10
  2599. pxor xmm7, xmm9
  2600. movdqa xmm12, xmm8
  2601. movdqa xmm13, xmm8
  2602. movdqa xmm14, xmm8
  2603. pslld xmm12, 31
  2604. pslld xmm13, 30
  2605. pslld xmm14, 25
  2606. pxor xmm12, xmm13
  2607. pxor xmm12, xmm14
  2608. movdqa xmm13, xmm12
  2609. psrldq xmm13, 4
  2610. pslldq xmm12, 12
  2611. pxor xmm8, xmm12
  2612. movdqa xmm14, xmm8
  2613. movdqa xmm10, xmm8
  2614. movdqa xmm9, xmm8
  2615. psrld xmm14, 1
  2616. psrld xmm10, 2
  2617. psrld xmm9, 7
  2618. pxor xmm14, xmm10
  2619. pxor xmm14, xmm9
  2620. pxor xmm14, xmm13
  2621. pxor xmm14, xmm8
  2622. pxor xmm7, xmm14
  2623. movdqu [rsp+64], xmm7
  2624. ; H ^ 6
  2625. pshufd xmm9, xmm1, 78
  2626. pshufd xmm10, xmm1, 78
  2627. movdqa xmm11, xmm1
  2628. movdqa xmm8, xmm1
  2629. pclmulqdq xmm11, xmm1, 17
  2630. pclmulqdq xmm8, xmm1, 0
  2631. pxor xmm9, xmm1
  2632. pxor xmm10, xmm1
  2633. pclmulqdq xmm9, xmm10, 0
  2634. pxor xmm9, xmm8
  2635. pxor xmm9, xmm11
  2636. movdqa xmm10, xmm9
  2637. movdqa xmm7, xmm11
  2638. pslldq xmm10, 8
  2639. psrldq xmm9, 8
  2640. pxor xmm8, xmm10
  2641. pxor xmm7, xmm9
  2642. movdqa xmm12, xmm8
  2643. movdqa xmm13, xmm8
  2644. movdqa xmm14, xmm8
  2645. pslld xmm12, 31
  2646. pslld xmm13, 30
  2647. pslld xmm14, 25
  2648. pxor xmm12, xmm13
  2649. pxor xmm12, xmm14
  2650. movdqa xmm13, xmm12
  2651. psrldq xmm13, 4
  2652. pslldq xmm12, 12
  2653. pxor xmm8, xmm12
  2654. movdqa xmm14, xmm8
  2655. movdqa xmm10, xmm8
  2656. movdqa xmm9, xmm8
  2657. psrld xmm14, 1
  2658. psrld xmm10, 2
  2659. psrld xmm9, 7
  2660. pxor xmm14, xmm10
  2661. pxor xmm14, xmm9
  2662. pxor xmm14, xmm13
  2663. pxor xmm14, xmm8
  2664. pxor xmm7, xmm14
  2665. movdqu [rsp+80], xmm7
  2666. ; H ^ 7
  2667. pshufd xmm9, xmm1, 78
  2668. pshufd xmm10, xmm3, 78
  2669. movdqa xmm11, xmm3
  2670. movdqa xmm8, xmm3
  2671. pclmulqdq xmm11, xmm1, 17
  2672. pclmulqdq xmm8, xmm1, 0
  2673. pxor xmm9, xmm1
  2674. pxor xmm10, xmm3
  2675. pclmulqdq xmm9, xmm10, 0
  2676. pxor xmm9, xmm8
  2677. pxor xmm9, xmm11
  2678. movdqa xmm10, xmm9
  2679. movdqa xmm7, xmm11
  2680. pslldq xmm10, 8
  2681. psrldq xmm9, 8
  2682. pxor xmm8, xmm10
  2683. pxor xmm7, xmm9
  2684. movdqa xmm12, xmm8
  2685. movdqa xmm13, xmm8
  2686. movdqa xmm14, xmm8
  2687. pslld xmm12, 31
  2688. pslld xmm13, 30
  2689. pslld xmm14, 25
  2690. pxor xmm12, xmm13
  2691. pxor xmm12, xmm14
  2692. movdqa xmm13, xmm12
  2693. psrldq xmm13, 4
  2694. pslldq xmm12, 12
  2695. pxor xmm8, xmm12
  2696. movdqa xmm14, xmm8
  2697. movdqa xmm10, xmm8
  2698. movdqa xmm9, xmm8
  2699. psrld xmm14, 1
  2700. psrld xmm10, 2
  2701. psrld xmm9, 7
  2702. pxor xmm14, xmm10
  2703. pxor xmm14, xmm9
  2704. pxor xmm14, xmm13
  2705. pxor xmm14, xmm8
  2706. pxor xmm7, xmm14
  2707. movdqu [rsp+96], xmm7
  2708. ; H ^ 8
  2709. pshufd xmm9, xmm3, 78
  2710. pshufd xmm10, xmm3, 78
  2711. movdqa xmm11, xmm3
  2712. movdqa xmm8, xmm3
  2713. pclmulqdq xmm11, xmm3, 17
  2714. pclmulqdq xmm8, xmm3, 0
  2715. pxor xmm9, xmm3
  2716. pxor xmm10, xmm3
  2717. pclmulqdq xmm9, xmm10, 0
  2718. pxor xmm9, xmm8
  2719. pxor xmm9, xmm11
  2720. movdqa xmm10, xmm9
  2721. movdqa xmm7, xmm11
  2722. pslldq xmm10, 8
  2723. psrldq xmm9, 8
  2724. pxor xmm8, xmm10
  2725. pxor xmm7, xmm9
  2726. movdqa xmm12, xmm8
  2727. movdqa xmm13, xmm8
  2728. movdqa xmm14, xmm8
  2729. pslld xmm12, 31
  2730. pslld xmm13, 30
  2731. pslld xmm14, 25
  2732. pxor xmm12, xmm13
  2733. pxor xmm12, xmm14
  2734. movdqa xmm13, xmm12
  2735. psrldq xmm13, 4
  2736. pslldq xmm12, 12
  2737. pxor xmm8, xmm12
  2738. movdqa xmm14, xmm8
  2739. movdqa xmm10, xmm8
  2740. movdqa xmm9, xmm8
  2741. psrld xmm14, 1
  2742. psrld xmm10, 2
  2743. psrld xmm9, 7
  2744. pxor xmm14, xmm10
  2745. pxor xmm14, xmm9
  2746. pxor xmm14, xmm13
  2747. pxor xmm14, xmm8
  2748. pxor xmm7, xmm14
  2749. movdqu [rsp+112], xmm7
  2750. L_AES_GCM_decrypt_ghash_128:
  2751. lea rcx, QWORD PTR [rdi+rbx]
  2752. lea rdx, QWORD PTR [rsi+rbx]
  2753. movdqu xmm8, [rsp+128]
  2754. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  2755. movdqa xmm0, xmm8
  2756. pshufb xmm8, xmm1
  2757. movdqa xmm9, xmm0
  2758. paddd xmm9, OWORD PTR L_aes_gcm_one
  2759. pshufb xmm9, xmm1
  2760. movdqa xmm10, xmm0
  2761. paddd xmm10, OWORD PTR L_aes_gcm_two
  2762. pshufb xmm10, xmm1
  2763. movdqa xmm11, xmm0
  2764. paddd xmm11, OWORD PTR L_aes_gcm_three
  2765. pshufb xmm11, xmm1
  2766. movdqa xmm12, xmm0
  2767. paddd xmm12, OWORD PTR L_aes_gcm_four
  2768. pshufb xmm12, xmm1
  2769. movdqa xmm13, xmm0
  2770. paddd xmm13, OWORD PTR L_aes_gcm_five
  2771. pshufb xmm13, xmm1
  2772. movdqa xmm14, xmm0
  2773. paddd xmm14, OWORD PTR L_aes_gcm_six
  2774. pshufb xmm14, xmm1
  2775. movdqa xmm15, xmm0
  2776. paddd xmm15, OWORD PTR L_aes_gcm_seven
  2777. pshufb xmm15, xmm1
  2778. paddd xmm0, OWORD PTR L_aes_gcm_eight
  2779. movdqa xmm7, OWORD PTR [r15]
  2780. movdqu [rsp+128], xmm0
  2781. pxor xmm8, xmm7
  2782. pxor xmm9, xmm7
  2783. pxor xmm10, xmm7
  2784. pxor xmm11, xmm7
  2785. pxor xmm12, xmm7
  2786. pxor xmm13, xmm7
  2787. pxor xmm14, xmm7
  2788. pxor xmm15, xmm7
  2789. movdqu xmm7, [rsp+112]
  2790. movdqu xmm0, [rcx]
  2791. aesenc xmm8, [r15+16]
  2792. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2793. pxor xmm0, xmm2
  2794. pshufd xmm1, xmm7, 78
  2795. pshufd xmm5, xmm0, 78
  2796. pxor xmm1, xmm7
  2797. pxor xmm5, xmm0
  2798. movdqa xmm3, xmm0
  2799. pclmulqdq xmm3, xmm7, 17
  2800. aesenc xmm9, [r15+16]
  2801. aesenc xmm10, [r15+16]
  2802. movdqa xmm2, xmm0
  2803. pclmulqdq xmm2, xmm7, 0
  2804. aesenc xmm11, [r15+16]
  2805. aesenc xmm12, [r15+16]
  2806. pclmulqdq xmm1, xmm5, 0
  2807. aesenc xmm13, [r15+16]
  2808. aesenc xmm14, [r15+16]
  2809. aesenc xmm15, [r15+16]
  2810. pxor xmm1, xmm2
  2811. pxor xmm1, xmm3
  2812. movdqu xmm7, [rsp+96]
  2813. movdqu xmm0, [rcx+16]
  2814. pshufd xmm4, xmm7, 78
  2815. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2816. aesenc xmm8, [r15+32]
  2817. pxor xmm4, xmm7
  2818. pshufd xmm5, xmm0, 78
  2819. pxor xmm5, xmm0
  2820. movdqa xmm6, xmm0
  2821. pclmulqdq xmm6, xmm7, 17
  2822. aesenc xmm9, [r15+32]
  2823. aesenc xmm10, [r15+32]
  2824. pclmulqdq xmm7, xmm0, 0
  2825. aesenc xmm11, [r15+32]
  2826. aesenc xmm12, [r15+32]
  2827. pclmulqdq xmm4, xmm5, 0
  2828. aesenc xmm13, [r15+32]
  2829. aesenc xmm14, [r15+32]
  2830. aesenc xmm15, [r15+32]
  2831. pxor xmm1, xmm7
  2832. pxor xmm2, xmm7
  2833. pxor xmm1, xmm6
  2834. pxor xmm3, xmm6
  2835. pxor xmm1, xmm4
  2836. movdqu xmm7, [rsp+80]
  2837. movdqu xmm0, [rcx+32]
  2838. pshufd xmm4, xmm7, 78
  2839. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2840. aesenc xmm8, [r15+48]
  2841. pxor xmm4, xmm7
  2842. pshufd xmm5, xmm0, 78
  2843. pxor xmm5, xmm0
  2844. movdqa xmm6, xmm0
  2845. pclmulqdq xmm6, xmm7, 17
  2846. aesenc xmm9, [r15+48]
  2847. aesenc xmm10, [r15+48]
  2848. pclmulqdq xmm7, xmm0, 0
  2849. aesenc xmm11, [r15+48]
  2850. aesenc xmm12, [r15+48]
  2851. pclmulqdq xmm4, xmm5, 0
  2852. aesenc xmm13, [r15+48]
  2853. aesenc xmm14, [r15+48]
  2854. aesenc xmm15, [r15+48]
  2855. pxor xmm1, xmm7
  2856. pxor xmm2, xmm7
  2857. pxor xmm1, xmm6
  2858. pxor xmm3, xmm6
  2859. pxor xmm1, xmm4
  2860. movdqu xmm7, [rsp+64]
  2861. movdqu xmm0, [rcx+48]
  2862. pshufd xmm4, xmm7, 78
  2863. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2864. aesenc xmm8, [r15+64]
  2865. pxor xmm4, xmm7
  2866. pshufd xmm5, xmm0, 78
  2867. pxor xmm5, xmm0
  2868. movdqa xmm6, xmm0
  2869. pclmulqdq xmm6, xmm7, 17
  2870. aesenc xmm9, [r15+64]
  2871. aesenc xmm10, [r15+64]
  2872. pclmulqdq xmm7, xmm0, 0
  2873. aesenc xmm11, [r15+64]
  2874. aesenc xmm12, [r15+64]
  2875. pclmulqdq xmm4, xmm5, 0
  2876. aesenc xmm13, [r15+64]
  2877. aesenc xmm14, [r15+64]
  2878. aesenc xmm15, [r15+64]
  2879. pxor xmm1, xmm7
  2880. pxor xmm2, xmm7
  2881. pxor xmm1, xmm6
  2882. pxor xmm3, xmm6
  2883. pxor xmm1, xmm4
  2884. movdqu xmm7, [rsp+48]
  2885. movdqu xmm0, [rcx+64]
  2886. pshufd xmm4, xmm7, 78
  2887. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2888. aesenc xmm8, [r15+80]
  2889. pxor xmm4, xmm7
  2890. pshufd xmm5, xmm0, 78
  2891. pxor xmm5, xmm0
  2892. movdqa xmm6, xmm0
  2893. pclmulqdq xmm6, xmm7, 17
  2894. aesenc xmm9, [r15+80]
  2895. aesenc xmm10, [r15+80]
  2896. pclmulqdq xmm7, xmm0, 0
  2897. aesenc xmm11, [r15+80]
  2898. aesenc xmm12, [r15+80]
  2899. pclmulqdq xmm4, xmm5, 0
  2900. aesenc xmm13, [r15+80]
  2901. aesenc xmm14, [r15+80]
  2902. aesenc xmm15, [r15+80]
  2903. pxor xmm1, xmm7
  2904. pxor xmm2, xmm7
  2905. pxor xmm1, xmm6
  2906. pxor xmm3, xmm6
  2907. pxor xmm1, xmm4
  2908. movdqu xmm7, [rsp+32]
  2909. movdqu xmm0, [rcx+80]
  2910. pshufd xmm4, xmm7, 78
  2911. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2912. aesenc xmm8, [r15+96]
  2913. pxor xmm4, xmm7
  2914. pshufd xmm5, xmm0, 78
  2915. pxor xmm5, xmm0
  2916. movdqa xmm6, xmm0
  2917. pclmulqdq xmm6, xmm7, 17
  2918. aesenc xmm9, [r15+96]
  2919. aesenc xmm10, [r15+96]
  2920. pclmulqdq xmm7, xmm0, 0
  2921. aesenc xmm11, [r15+96]
  2922. aesenc xmm12, [r15+96]
  2923. pclmulqdq xmm4, xmm5, 0
  2924. aesenc xmm13, [r15+96]
  2925. aesenc xmm14, [r15+96]
  2926. aesenc xmm15, [r15+96]
  2927. pxor xmm1, xmm7
  2928. pxor xmm2, xmm7
  2929. pxor xmm1, xmm6
  2930. pxor xmm3, xmm6
  2931. pxor xmm1, xmm4
  2932. movdqu xmm7, [rsp+16]
  2933. movdqu xmm0, [rcx+96]
  2934. pshufd xmm4, xmm7, 78
  2935. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2936. aesenc xmm8, [r15+112]
  2937. pxor xmm4, xmm7
  2938. pshufd xmm5, xmm0, 78
  2939. pxor xmm5, xmm0
  2940. movdqa xmm6, xmm0
  2941. pclmulqdq xmm6, xmm7, 17
  2942. aesenc xmm9, [r15+112]
  2943. aesenc xmm10, [r15+112]
  2944. pclmulqdq xmm7, xmm0, 0
  2945. aesenc xmm11, [r15+112]
  2946. aesenc xmm12, [r15+112]
  2947. pclmulqdq xmm4, xmm5, 0
  2948. aesenc xmm13, [r15+112]
  2949. aesenc xmm14, [r15+112]
  2950. aesenc xmm15, [r15+112]
  2951. pxor xmm1, xmm7
  2952. pxor xmm2, xmm7
  2953. pxor xmm1, xmm6
  2954. pxor xmm3, xmm6
  2955. pxor xmm1, xmm4
  2956. movdqu xmm7, [rsp]
  2957. movdqu xmm0, [rcx+112]
  2958. pshufd xmm4, xmm7, 78
  2959. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  2960. aesenc xmm8, [r15+128]
  2961. pxor xmm4, xmm7
  2962. pshufd xmm5, xmm0, 78
  2963. pxor xmm5, xmm0
  2964. movdqa xmm6, xmm0
  2965. pclmulqdq xmm6, xmm7, 17
  2966. aesenc xmm9, [r15+128]
  2967. aesenc xmm10, [r15+128]
  2968. pclmulqdq xmm7, xmm0, 0
  2969. aesenc xmm11, [r15+128]
  2970. aesenc xmm12, [r15+128]
  2971. pclmulqdq xmm4, xmm5, 0
  2972. aesenc xmm13, [r15+128]
  2973. aesenc xmm14, [r15+128]
  2974. aesenc xmm15, [r15+128]
  2975. pxor xmm1, xmm7
  2976. pxor xmm2, xmm7
  2977. pxor xmm1, xmm6
  2978. pxor xmm3, xmm6
  2979. pxor xmm1, xmm4
  2980. movdqa xmm5, xmm1
  2981. psrldq xmm1, 8
  2982. pslldq xmm5, 8
  2983. aesenc xmm8, [r15+144]
  2984. pxor xmm2, xmm5
  2985. pxor xmm3, xmm1
  2986. movdqa xmm7, xmm2
  2987. movdqa xmm4, xmm2
  2988. movdqa xmm5, xmm2
  2989. aesenc xmm9, [r15+144]
  2990. pslld xmm7, 31
  2991. pslld xmm4, 30
  2992. pslld xmm5, 25
  2993. aesenc xmm10, [r15+144]
  2994. pxor xmm7, xmm4
  2995. pxor xmm7, xmm5
  2996. aesenc xmm11, [r15+144]
  2997. movdqa xmm4, xmm7
  2998. pslldq xmm7, 12
  2999. psrldq xmm4, 4
  3000. aesenc xmm12, [r15+144]
  3001. pxor xmm2, xmm7
  3002. movdqa xmm5, xmm2
  3003. movdqa xmm1, xmm2
  3004. movdqa xmm0, xmm2
  3005. aesenc xmm13, [r15+144]
  3006. psrld xmm5, 1
  3007. psrld xmm1, 2
  3008. psrld xmm0, 7
  3009. aesenc xmm14, [r15+144]
  3010. pxor xmm5, xmm1
  3011. pxor xmm5, xmm0
  3012. aesenc xmm15, [r15+144]
  3013. pxor xmm5, xmm4
  3014. pxor xmm2, xmm5
  3015. pxor xmm2, xmm3
  3016. cmp r10d, 11
  3017. movdqa xmm7, OWORD PTR [r15+160]
  3018. jl L_AES_GCM_decrypt_aesenc_128_ghash_avx_done
  3019. aesenc xmm8, xmm7
  3020. aesenc xmm9, xmm7
  3021. aesenc xmm10, xmm7
  3022. aesenc xmm11, xmm7
  3023. aesenc xmm12, xmm7
  3024. aesenc xmm13, xmm7
  3025. aesenc xmm14, xmm7
  3026. aesenc xmm15, xmm7
  3027. movdqa xmm7, OWORD PTR [r15+176]
  3028. aesenc xmm8, xmm7
  3029. aesenc xmm9, xmm7
  3030. aesenc xmm10, xmm7
  3031. aesenc xmm11, xmm7
  3032. aesenc xmm12, xmm7
  3033. aesenc xmm13, xmm7
  3034. aesenc xmm14, xmm7
  3035. aesenc xmm15, xmm7
  3036. cmp r10d, 13
  3037. movdqa xmm7, OWORD PTR [r15+192]
  3038. jl L_AES_GCM_decrypt_aesenc_128_ghash_avx_done
  3039. aesenc xmm8, xmm7
  3040. aesenc xmm9, xmm7
  3041. aesenc xmm10, xmm7
  3042. aesenc xmm11, xmm7
  3043. aesenc xmm12, xmm7
  3044. aesenc xmm13, xmm7
  3045. aesenc xmm14, xmm7
  3046. aesenc xmm15, xmm7
  3047. movdqa xmm7, OWORD PTR [r15+208]
  3048. aesenc xmm8, xmm7
  3049. aesenc xmm9, xmm7
  3050. aesenc xmm10, xmm7
  3051. aesenc xmm11, xmm7
  3052. aesenc xmm12, xmm7
  3053. aesenc xmm13, xmm7
  3054. aesenc xmm14, xmm7
  3055. aesenc xmm15, xmm7
  3056. movdqa xmm7, OWORD PTR [r15+224]
  3057. L_AES_GCM_decrypt_aesenc_128_ghash_avx_done:
  3058. aesenclast xmm8, xmm7
  3059. aesenclast xmm9, xmm7
  3060. movdqu xmm0, [rcx]
  3061. movdqu xmm1, [rcx+16]
  3062. pxor xmm8, xmm0
  3063. pxor xmm9, xmm1
  3064. movdqu [rdx], xmm8
  3065. movdqu [rdx+16], xmm9
  3066. aesenclast xmm10, xmm7
  3067. aesenclast xmm11, xmm7
  3068. movdqu xmm0, [rcx+32]
  3069. movdqu xmm1, [rcx+48]
  3070. pxor xmm10, xmm0
  3071. pxor xmm11, xmm1
  3072. movdqu [rdx+32], xmm10
  3073. movdqu [rdx+48], xmm11
  3074. aesenclast xmm12, xmm7
  3075. aesenclast xmm13, xmm7
  3076. movdqu xmm0, [rcx+64]
  3077. movdqu xmm1, [rcx+80]
  3078. pxor xmm12, xmm0
  3079. pxor xmm13, xmm1
  3080. movdqu [rdx+64], xmm12
  3081. movdqu [rdx+80], xmm13
  3082. aesenclast xmm14, xmm7
  3083. aesenclast xmm15, xmm7
  3084. movdqu xmm0, [rcx+96]
  3085. movdqu xmm1, [rcx+112]
  3086. pxor xmm14, xmm0
  3087. pxor xmm15, xmm1
  3088. movdqu [rdx+96], xmm14
  3089. movdqu [rdx+112], xmm15
  3090. add ebx, 128
  3091. cmp ebx, r13d
  3092. jl L_AES_GCM_decrypt_ghash_128
  3093. movdqa xmm6, xmm2
  3094. movdqu xmm5, [rsp]
  3095. L_AES_GCM_decrypt_done_128:
  3096. mov edx, r9d
  3097. cmp ebx, edx
  3098. jge L_AES_GCM_decrypt_done_dec
  3099. mov r13d, r9d
  3100. and r13d, 4294967280
  3101. cmp ebx, r13d
  3102. jge L_AES_GCM_decrypt_last_block_done
  3103. L_AES_GCM_decrypt_last_block_start:
  3104. lea rcx, QWORD PTR [rdi+rbx]
  3105. lea rdx, QWORD PTR [rsi+rbx]
  3106. movdqu xmm1, [rcx]
  3107. movdqa xmm0, xmm5
  3108. pshufb xmm1, OWORD PTR L_aes_gcm_bswap_mask
  3109. pxor xmm1, xmm6
  3110. movdqu xmm8, [rsp+128]
  3111. movdqa xmm9, xmm8
  3112. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  3113. paddd xmm9, OWORD PTR L_aes_gcm_one
  3114. pxor xmm8, [r15]
  3115. movdqu [rsp+128], xmm9
  3116. movdqa xmm10, xmm1
  3117. pclmulqdq xmm10, xmm0, 16
  3118. aesenc xmm8, [r15+16]
  3119. aesenc xmm8, [r15+32]
  3120. movdqa xmm11, xmm1
  3121. pclmulqdq xmm11, xmm0, 1
  3122. aesenc xmm8, [r15+48]
  3123. aesenc xmm8, [r15+64]
  3124. movdqa xmm12, xmm1
  3125. pclmulqdq xmm12, xmm0, 0
  3126. aesenc xmm8, [r15+80]
  3127. movdqa xmm1, xmm1
  3128. pclmulqdq xmm1, xmm0, 17
  3129. aesenc xmm8, [r15+96]
  3130. pxor xmm10, xmm11
  3131. movdqa xmm2, xmm10
  3132. psrldq xmm10, 8
  3133. pslldq xmm2, 8
  3134. aesenc xmm8, [r15+112]
  3135. movdqa xmm3, xmm1
  3136. pxor xmm2, xmm12
  3137. pxor xmm3, xmm10
  3138. movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
  3139. movdqa xmm11, xmm2
  3140. pclmulqdq xmm11, xmm0, 16
  3141. aesenc xmm8, [r15+128]
  3142. pshufd xmm10, xmm2, 78
  3143. pxor xmm10, xmm11
  3144. movdqa xmm11, xmm10
  3145. pclmulqdq xmm11, xmm0, 16
  3146. aesenc xmm8, [r15+144]
  3147. pshufd xmm6, xmm10, 78
  3148. pxor xmm6, xmm11
  3149. pxor xmm6, xmm3
  3150. cmp r10d, 11
  3151. movdqa xmm9, OWORD PTR [r15+160]
  3152. jl L_AES_GCM_decrypt_aesenc_gfmul_last
  3153. aesenc xmm8, xmm9
  3154. aesenc xmm8, [r15+176]
  3155. cmp r10d, 13
  3156. movdqa xmm9, OWORD PTR [r15+192]
  3157. jl L_AES_GCM_decrypt_aesenc_gfmul_last
  3158. aesenc xmm8, xmm9
  3159. aesenc xmm8, [r15+208]
  3160. movdqa xmm9, OWORD PTR [r15+224]
  3161. L_AES_GCM_decrypt_aesenc_gfmul_last:
  3162. aesenclast xmm8, xmm9
  3163. movdqu xmm9, [rcx]
  3164. pxor xmm8, xmm9
  3165. movdqu [rdx], xmm8
  3166. add ebx, 16
  3167. cmp ebx, r13d
  3168. jl L_AES_GCM_decrypt_last_block_start
  3169. L_AES_GCM_decrypt_last_block_done:
  3170. mov ecx, r9d
  3171. mov edx, ecx
  3172. and ecx, 15
  3173. jz L_AES_GCM_decrypt_aesenc_last15_dec_avx_done
  3174. movdqu xmm4, [rsp+128]
  3175. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
  3176. pxor xmm4, [r15]
  3177. aesenc xmm4, [r15+16]
  3178. aesenc xmm4, [r15+32]
  3179. aesenc xmm4, [r15+48]
  3180. aesenc xmm4, [r15+64]
  3181. aesenc xmm4, [r15+80]
  3182. aesenc xmm4, [r15+96]
  3183. aesenc xmm4, [r15+112]
  3184. aesenc xmm4, [r15+128]
  3185. aesenc xmm4, [r15+144]
  3186. cmp r10d, 11
  3187. movdqa xmm9, OWORD PTR [r15+160]
  3188. jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last
  3189. aesenc xmm4, xmm9
  3190. aesenc xmm4, [r15+176]
  3191. cmp r10d, 13
  3192. movdqa xmm9, OWORD PTR [r15+192]
  3193. jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last
  3194. aesenc xmm4, xmm9
  3195. aesenc xmm4, [r15+208]
  3196. movdqa xmm9, OWORD PTR [r15+224]
  3197. L_AES_GCM_decrypt_aesenc_last15_dec_avx_aesenc_avx_last:
  3198. aesenclast xmm4, xmm9
  3199. sub rsp, 32
  3200. xor ecx, ecx
  3201. movdqu [rsp], xmm4
  3202. pxor xmm0, xmm0
  3203. movdqu [rsp+16], xmm0
  3204. L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop:
  3205. movzx r13d, BYTE PTR [rdi+rbx]
  3206. mov BYTE PTR [rsp+rcx+16], r13b
  3207. xor r13b, BYTE PTR [rsp+rcx]
  3208. mov BYTE PTR [rsi+rbx], r13b
  3209. inc ebx
  3210. inc ecx
  3211. cmp ebx, edx
  3212. jl L_AES_GCM_decrypt_aesenc_last15_dec_avx_loop
  3213. movdqu xmm4, [rsp+16]
  3214. add rsp, 32
  3215. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  3216. pxor xmm6, xmm4
  3217. pshufd xmm9, xmm5, 78
  3218. pshufd xmm10, xmm6, 78
  3219. movdqa xmm11, xmm6
  3220. movdqa xmm8, xmm6
  3221. pclmulqdq xmm11, xmm5, 17
  3222. pclmulqdq xmm8, xmm5, 0
  3223. pxor xmm9, xmm5
  3224. pxor xmm10, xmm6
  3225. pclmulqdq xmm9, xmm10, 0
  3226. pxor xmm9, xmm8
  3227. pxor xmm9, xmm11
  3228. movdqa xmm10, xmm9
  3229. movdqa xmm6, xmm11
  3230. pslldq xmm10, 8
  3231. psrldq xmm9, 8
  3232. pxor xmm8, xmm10
  3233. pxor xmm6, xmm9
  3234. movdqa xmm12, xmm8
  3235. movdqa xmm13, xmm8
  3236. movdqa xmm14, xmm8
  3237. pslld xmm12, 31
  3238. pslld xmm13, 30
  3239. pslld xmm14, 25
  3240. pxor xmm12, xmm13
  3241. pxor xmm12, xmm14
  3242. movdqa xmm13, xmm12
  3243. psrldq xmm13, 4
  3244. pslldq xmm12, 12
  3245. pxor xmm8, xmm12
  3246. movdqa xmm14, xmm8
  3247. movdqa xmm10, xmm8
  3248. movdqa xmm9, xmm8
  3249. psrld xmm14, 1
  3250. psrld xmm10, 2
  3251. psrld xmm9, 7
  3252. pxor xmm14, xmm10
  3253. pxor xmm14, xmm9
  3254. pxor xmm14, xmm13
  3255. pxor xmm14, xmm8
  3256. pxor xmm6, xmm14
  3257. L_AES_GCM_decrypt_aesenc_last15_dec_avx_done:
  3258. L_AES_GCM_decrypt_done_dec:
  3259. mov edx, r9d
  3260. mov ecx, r11d
  3261. shl rdx, 3
  3262. shl rcx, 3
  3263. pinsrq xmm0, rdx, 0
  3264. pinsrq xmm0, rcx, 1
  3265. pxor xmm6, xmm0
  3266. pshufd xmm9, xmm5, 78
  3267. pshufd xmm10, xmm6, 78
  3268. movdqa xmm11, xmm6
  3269. movdqa xmm8, xmm6
  3270. pclmulqdq xmm11, xmm5, 17
  3271. pclmulqdq xmm8, xmm5, 0
  3272. pxor xmm9, xmm5
  3273. pxor xmm10, xmm6
  3274. pclmulqdq xmm9, xmm10, 0
  3275. pxor xmm9, xmm8
  3276. pxor xmm9, xmm11
  3277. movdqa xmm10, xmm9
  3278. movdqa xmm6, xmm11
  3279. pslldq xmm10, 8
  3280. psrldq xmm9, 8
  3281. pxor xmm8, xmm10
  3282. pxor xmm6, xmm9
  3283. movdqa xmm12, xmm8
  3284. movdqa xmm13, xmm8
  3285. movdqa xmm14, xmm8
  3286. pslld xmm12, 31
  3287. pslld xmm13, 30
  3288. pslld xmm14, 25
  3289. pxor xmm12, xmm13
  3290. pxor xmm12, xmm14
  3291. movdqa xmm13, xmm12
  3292. psrldq xmm13, 4
  3293. pslldq xmm12, 12
  3294. pxor xmm8, xmm12
  3295. movdqa xmm14, xmm8
  3296. movdqa xmm10, xmm8
  3297. movdqa xmm9, xmm8
  3298. psrld xmm14, 1
  3299. psrld xmm10, 2
  3300. psrld xmm9, 7
  3301. pxor xmm14, xmm10
  3302. pxor xmm14, xmm9
  3303. pxor xmm14, xmm13
  3304. pxor xmm14, xmm8
  3305. pxor xmm6, xmm14
  3306. pshufb xmm6, OWORD PTR L_aes_gcm_bswap_mask
  3307. movdqu xmm0, [rsp+144]
  3308. pxor xmm0, xmm6
  3309. cmp r14d, 16
  3310. je L_AES_GCM_decrypt_cmp_tag_16
  3311. sub rsp, 16
  3312. xor rcx, rcx
  3313. xor rbx, rbx
  3314. movdqu [rsp], xmm0
  3315. L_AES_GCM_decrypt_cmp_tag_loop:
  3316. movzx r13d, BYTE PTR [rsp+rcx]
  3317. xor r13b, BYTE PTR [r8+rcx]
  3318. or bl, r13b
  3319. inc ecx
  3320. cmp ecx, r14d
  3321. jne L_AES_GCM_decrypt_cmp_tag_loop
  3322. cmp rbx, 0
  3323. sete bl
  3324. add rsp, 16
  3325. xor rcx, rcx
  3326. jmp L_AES_GCM_decrypt_cmp_tag_done
  3327. L_AES_GCM_decrypt_cmp_tag_16:
  3328. movdqu xmm1, [r8]
  3329. pcmpeqb xmm0, xmm1
  3330. pmovmskb rdx, xmm0
  3331. ; %%edx == 0xFFFF then return 1 else => return 0
  3332. xor ebx, ebx
  3333. cmp edx, 65535
  3334. sete bl
  3335. L_AES_GCM_decrypt_cmp_tag_done:
  3336. mov DWORD PTR [rbp], ebx
  3337. add rsp, 168
  3338. pop rbp
  3339. pop r15
  3340. pop r14
  3341. pop rbx
  3342. pop r12
  3343. pop rsi
  3344. pop rdi
  3345. pop r13
  3346. ret
  3347. AES_GCM_decrypt ENDP
  3348. _text ENDS
  3349. _text SEGMENT READONLY PARA
  3350. AES_GCM_init_aesni PROC
  3351. push rdi
  3352. push rsi
  3353. push r12
  3354. push r13
  3355. push r14
  3356. mov rdi, rcx
  3357. mov rsi, rdx
  3358. mov r10, r8
  3359. mov r11d, r9d
  3360. mov rax, QWORD PTR [rsp+80]
  3361. mov r8, QWORD PTR [rsp+88]
  3362. mov r9, QWORD PTR [rsp+96]
  3363. sub rsp, 16
  3364. pxor xmm4, xmm4
  3365. mov edx, r11d
  3366. cmp edx, 12
  3367. jne L_AES_GCM_init_aesni_iv_not_12
  3368. ; # Calculate values when IV is 12 bytes
  3369. ; Set counter based on IV
  3370. mov ecx, 16777216
  3371. pinsrq xmm4, QWORD PTR [r10], 0
  3372. pinsrd xmm4, DWORD PTR [r10+8], 2
  3373. pinsrd xmm4, ecx, 3
  3374. ; H = Encrypt X(=0) and T = Encrypt counter
  3375. movdqa xmm1, xmm4
  3376. movdqa xmm5, OWORD PTR [rdi]
  3377. pxor xmm1, xmm5
  3378. movdqa xmm7, OWORD PTR [rdi+16]
  3379. aesenc xmm5, xmm7
  3380. aesenc xmm1, xmm7
  3381. movdqa xmm7, OWORD PTR [rdi+32]
  3382. aesenc xmm5, xmm7
  3383. aesenc xmm1, xmm7
  3384. movdqa xmm7, OWORD PTR [rdi+48]
  3385. aesenc xmm5, xmm7
  3386. aesenc xmm1, xmm7
  3387. movdqa xmm7, OWORD PTR [rdi+64]
  3388. aesenc xmm5, xmm7
  3389. aesenc xmm1, xmm7
  3390. movdqa xmm7, OWORD PTR [rdi+80]
  3391. aesenc xmm5, xmm7
  3392. aesenc xmm1, xmm7
  3393. movdqa xmm7, OWORD PTR [rdi+96]
  3394. aesenc xmm5, xmm7
  3395. aesenc xmm1, xmm7
  3396. movdqa xmm7, OWORD PTR [rdi+112]
  3397. aesenc xmm5, xmm7
  3398. aesenc xmm1, xmm7
  3399. movdqa xmm7, OWORD PTR [rdi+128]
  3400. aesenc xmm5, xmm7
  3401. aesenc xmm1, xmm7
  3402. movdqa xmm7, OWORD PTR [rdi+144]
  3403. aesenc xmm5, xmm7
  3404. aesenc xmm1, xmm7
  3405. cmp esi, 11
  3406. movdqa xmm7, OWORD PTR [rdi+160]
  3407. jl L_AES_GCM_init_aesni_calc_iv_12_last
  3408. aesenc xmm5, xmm7
  3409. aesenc xmm1, xmm7
  3410. movdqa xmm7, OWORD PTR [rdi+176]
  3411. aesenc xmm5, xmm7
  3412. aesenc xmm1, xmm7
  3413. cmp esi, 13
  3414. movdqa xmm7, OWORD PTR [rdi+192]
  3415. jl L_AES_GCM_init_aesni_calc_iv_12_last
  3416. aesenc xmm5, xmm7
  3417. aesenc xmm1, xmm7
  3418. movdqa xmm7, OWORD PTR [rdi+208]
  3419. aesenc xmm5, xmm7
  3420. aesenc xmm1, xmm7
  3421. movdqa xmm7, OWORD PTR [rdi+224]
  3422. L_AES_GCM_init_aesni_calc_iv_12_last:
  3423. aesenclast xmm5, xmm7
  3424. aesenclast xmm1, xmm7
  3425. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  3426. movdqu xmm15, xmm1
  3427. jmp L_AES_GCM_init_aesni_iv_done
  3428. L_AES_GCM_init_aesni_iv_not_12:
  3429. ; Calculate values when IV is not 12 bytes
  3430. ; H = Encrypt X(=0)
  3431. movdqa xmm5, OWORD PTR [rdi]
  3432. aesenc xmm5, [rdi+16]
  3433. aesenc xmm5, [rdi+32]
  3434. aesenc xmm5, [rdi+48]
  3435. aesenc xmm5, [rdi+64]
  3436. aesenc xmm5, [rdi+80]
  3437. aesenc xmm5, [rdi+96]
  3438. aesenc xmm5, [rdi+112]
  3439. aesenc xmm5, [rdi+128]
  3440. aesenc xmm5, [rdi+144]
  3441. cmp esi, 11
  3442. movdqa xmm9, OWORD PTR [rdi+160]
  3443. jl L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last
  3444. aesenc xmm5, xmm9
  3445. aesenc xmm5, [rdi+176]
  3446. cmp esi, 13
  3447. movdqa xmm9, OWORD PTR [rdi+192]
  3448. jl L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last
  3449. aesenc xmm5, xmm9
  3450. aesenc xmm5, [rdi+208]
  3451. movdqa xmm9, OWORD PTR [rdi+224]
  3452. L_AES_GCM_init_aesni_calc_iv_1_aesenc_avx_last:
  3453. aesenclast xmm5, xmm9
  3454. pshufb xmm5, OWORD PTR L_aes_gcm_bswap_mask
  3455. ; Calc counter
  3456. ; Initialization vector
  3457. cmp edx, 0
  3458. mov rcx, 0
  3459. je L_AES_GCM_init_aesni_calc_iv_done
  3460. cmp edx, 16
  3461. jl L_AES_GCM_init_aesni_calc_iv_lt16
  3462. and edx, 4294967280
  3463. L_AES_GCM_init_aesni_calc_iv_16_loop:
  3464. movdqu xmm8, [r10+rcx]
  3465. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  3466. pxor xmm4, xmm8
  3467. pshufd xmm1, xmm4, 78
  3468. pshufd xmm2, xmm5, 78
  3469. movdqa xmm3, xmm5
  3470. movdqa xmm0, xmm5
  3471. pclmulqdq xmm3, xmm4, 17
  3472. pclmulqdq xmm0, xmm4, 0
  3473. pxor xmm1, xmm4
  3474. pxor xmm2, xmm5
  3475. pclmulqdq xmm1, xmm2, 0
  3476. pxor xmm1, xmm0
  3477. pxor xmm1, xmm3
  3478. movdqa xmm2, xmm1
  3479. movdqa xmm7, xmm0
  3480. movdqa xmm4, xmm3
  3481. pslldq xmm2, 8
  3482. psrldq xmm1, 8
  3483. pxor xmm7, xmm2
  3484. pxor xmm4, xmm1
  3485. movdqa xmm0, xmm7
  3486. movdqa xmm1, xmm4
  3487. psrld xmm0, 31
  3488. psrld xmm1, 31
  3489. pslld xmm7, 1
  3490. pslld xmm4, 1
  3491. movdqa xmm2, xmm0
  3492. pslldq xmm0, 4
  3493. psrldq xmm2, 12
  3494. pslldq xmm1, 4
  3495. por xmm4, xmm2
  3496. por xmm7, xmm0
  3497. por xmm4, xmm1
  3498. movdqa xmm0, xmm7
  3499. movdqa xmm1, xmm7
  3500. movdqa xmm2, xmm7
  3501. pslld xmm0, 31
  3502. pslld xmm1, 30
  3503. pslld xmm2, 25
  3504. pxor xmm0, xmm1
  3505. pxor xmm0, xmm2
  3506. movdqa xmm1, xmm0
  3507. psrldq xmm1, 4
  3508. pslldq xmm0, 12
  3509. pxor xmm7, xmm0
  3510. movdqa xmm2, xmm7
  3511. movdqa xmm3, xmm7
  3512. movdqa xmm0, xmm7
  3513. psrld xmm2, 1
  3514. psrld xmm3, 2
  3515. psrld xmm0, 7
  3516. pxor xmm2, xmm3
  3517. pxor xmm2, xmm0
  3518. pxor xmm2, xmm1
  3519. pxor xmm2, xmm7
  3520. pxor xmm4, xmm2
  3521. add ecx, 16
  3522. cmp ecx, edx
  3523. jl L_AES_GCM_init_aesni_calc_iv_16_loop
  3524. mov edx, r11d
  3525. cmp ecx, edx
  3526. je L_AES_GCM_init_aesni_calc_iv_done
  3527. L_AES_GCM_init_aesni_calc_iv_lt16:
  3528. sub rsp, 16
  3529. pxor xmm8, xmm8
  3530. xor r13d, r13d
  3531. movdqu [rsp], xmm8
  3532. L_AES_GCM_init_aesni_calc_iv_loop:
  3533. movzx r12d, BYTE PTR [r10+rcx]
  3534. mov BYTE PTR [rsp+r13], r12b
  3535. inc ecx
  3536. inc r13d
  3537. cmp ecx, edx
  3538. jl L_AES_GCM_init_aesni_calc_iv_loop
  3539. movdqu xmm8, [rsp]
  3540. add rsp, 16
  3541. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  3542. pxor xmm4, xmm8
  3543. pshufd xmm1, xmm4, 78
  3544. pshufd xmm2, xmm5, 78
  3545. movdqa xmm3, xmm5
  3546. movdqa xmm0, xmm5
  3547. pclmulqdq xmm3, xmm4, 17
  3548. pclmulqdq xmm0, xmm4, 0
  3549. pxor xmm1, xmm4
  3550. pxor xmm2, xmm5
  3551. pclmulqdq xmm1, xmm2, 0
  3552. pxor xmm1, xmm0
  3553. pxor xmm1, xmm3
  3554. movdqa xmm2, xmm1
  3555. movdqa xmm7, xmm0
  3556. movdqa xmm4, xmm3
  3557. pslldq xmm2, 8
  3558. psrldq xmm1, 8
  3559. pxor xmm7, xmm2
  3560. pxor xmm4, xmm1
  3561. movdqa xmm0, xmm7
  3562. movdqa xmm1, xmm4
  3563. psrld xmm0, 31
  3564. psrld xmm1, 31
  3565. pslld xmm7, 1
  3566. pslld xmm4, 1
  3567. movdqa xmm2, xmm0
  3568. pslldq xmm0, 4
  3569. psrldq xmm2, 12
  3570. pslldq xmm1, 4
  3571. por xmm4, xmm2
  3572. por xmm7, xmm0
  3573. por xmm4, xmm1
  3574. movdqa xmm0, xmm7
  3575. movdqa xmm1, xmm7
  3576. movdqa xmm2, xmm7
  3577. pslld xmm0, 31
  3578. pslld xmm1, 30
  3579. pslld xmm2, 25
  3580. pxor xmm0, xmm1
  3581. pxor xmm0, xmm2
  3582. movdqa xmm1, xmm0
  3583. psrldq xmm1, 4
  3584. pslldq xmm0, 12
  3585. pxor xmm7, xmm0
  3586. movdqa xmm2, xmm7
  3587. movdqa xmm3, xmm7
  3588. movdqa xmm0, xmm7
  3589. psrld xmm2, 1
  3590. psrld xmm3, 2
  3591. psrld xmm0, 7
  3592. pxor xmm2, xmm3
  3593. pxor xmm2, xmm0
  3594. pxor xmm2, xmm1
  3595. pxor xmm2, xmm7
  3596. pxor xmm4, xmm2
  3597. L_AES_GCM_init_aesni_calc_iv_done:
  3598. ; T = Encrypt counter
  3599. pxor xmm0, xmm0
  3600. shl edx, 3
  3601. pinsrq xmm0, rdx, 0
  3602. pxor xmm4, xmm0
  3603. pshufd xmm1, xmm4, 78
  3604. pshufd xmm2, xmm5, 78
  3605. movdqa xmm3, xmm5
  3606. movdqa xmm0, xmm5
  3607. pclmulqdq xmm3, xmm4, 17
  3608. pclmulqdq xmm0, xmm4, 0
  3609. pxor xmm1, xmm4
  3610. pxor xmm2, xmm5
  3611. pclmulqdq xmm1, xmm2, 0
  3612. pxor xmm1, xmm0
  3613. pxor xmm1, xmm3
  3614. movdqa xmm2, xmm1
  3615. movdqa xmm7, xmm0
  3616. movdqa xmm4, xmm3
  3617. pslldq xmm2, 8
  3618. psrldq xmm1, 8
  3619. pxor xmm7, xmm2
  3620. pxor xmm4, xmm1
  3621. movdqa xmm0, xmm7
  3622. movdqa xmm1, xmm4
  3623. psrld xmm0, 31
  3624. psrld xmm1, 31
  3625. pslld xmm7, 1
  3626. pslld xmm4, 1
  3627. movdqa xmm2, xmm0
  3628. pslldq xmm0, 4
  3629. psrldq xmm2, 12
  3630. pslldq xmm1, 4
  3631. por xmm4, xmm2
  3632. por xmm7, xmm0
  3633. por xmm4, xmm1
  3634. movdqa xmm0, xmm7
  3635. movdqa xmm1, xmm7
  3636. movdqa xmm2, xmm7
  3637. pslld xmm0, 31
  3638. pslld xmm1, 30
  3639. pslld xmm2, 25
  3640. pxor xmm0, xmm1
  3641. pxor xmm0, xmm2
  3642. movdqa xmm1, xmm0
  3643. psrldq xmm1, 4
  3644. pslldq xmm0, 12
  3645. pxor xmm7, xmm0
  3646. movdqa xmm2, xmm7
  3647. movdqa xmm3, xmm7
  3648. movdqa xmm0, xmm7
  3649. psrld xmm2, 1
  3650. psrld xmm3, 2
  3651. psrld xmm0, 7
  3652. pxor xmm2, xmm3
  3653. pxor xmm2, xmm0
  3654. pxor xmm2, xmm1
  3655. pxor xmm2, xmm7
  3656. pxor xmm4, xmm2
  3657. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  3658. ; Encrypt counter
  3659. movdqa xmm8, OWORD PTR [rdi]
  3660. pxor xmm8, xmm4
  3661. aesenc xmm8, [rdi+16]
  3662. aesenc xmm8, [rdi+32]
  3663. aesenc xmm8, [rdi+48]
  3664. aesenc xmm8, [rdi+64]
  3665. aesenc xmm8, [rdi+80]
  3666. aesenc xmm8, [rdi+96]
  3667. aesenc xmm8, [rdi+112]
  3668. aesenc xmm8, [rdi+128]
  3669. aesenc xmm8, [rdi+144]
  3670. cmp esi, 11
  3671. movdqa xmm9, OWORD PTR [rdi+160]
  3672. jl L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last
  3673. aesenc xmm8, xmm9
  3674. aesenc xmm8, [rdi+176]
  3675. cmp esi, 13
  3676. movdqa xmm9, OWORD PTR [rdi+192]
  3677. jl L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last
  3678. aesenc xmm8, xmm9
  3679. aesenc xmm8, [rdi+208]
  3680. movdqa xmm9, OWORD PTR [rdi+224]
  3681. L_AES_GCM_init_aesni_calc_iv_2_aesenc_avx_last:
  3682. aesenclast xmm8, xmm9
  3683. movdqu xmm15, xmm8
  3684. L_AES_GCM_init_aesni_iv_done:
  3685. movdqa OWORD PTR [r9], xmm15
  3686. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_epi64
  3687. paddd xmm4, OWORD PTR L_aes_gcm_one
  3688. movdqa OWORD PTR [rax], xmm5
  3689. movdqa OWORD PTR [r8], xmm4
  3690. add rsp, 16
  3691. pop r14
  3692. pop r13
  3693. pop r12
  3694. pop rsi
  3695. pop rdi
  3696. ret
  3697. AES_GCM_init_aesni ENDP
  3698. _text ENDS
  3699. _text SEGMENT READONLY PARA
  3700. AES_GCM_aad_update_aesni PROC
  3701. mov rax, rcx
  3702. movdqa xmm5, OWORD PTR [r8]
  3703. movdqa xmm6, OWORD PTR [r9]
  3704. xor ecx, ecx
  3705. L_AES_GCM_aad_update_aesni_16_loop:
  3706. movdqu xmm8, [rax+rcx]
  3707. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  3708. pxor xmm5, xmm8
  3709. pshufd xmm1, xmm5, 78
  3710. pshufd xmm2, xmm6, 78
  3711. movdqa xmm3, xmm6
  3712. movdqa xmm0, xmm6
  3713. pclmulqdq xmm3, xmm5, 17
  3714. pclmulqdq xmm0, xmm5, 0
  3715. pxor xmm1, xmm5
  3716. pxor xmm2, xmm6
  3717. pclmulqdq xmm1, xmm2, 0
  3718. pxor xmm1, xmm0
  3719. pxor xmm1, xmm3
  3720. movdqa xmm2, xmm1
  3721. movdqa xmm4, xmm0
  3722. movdqa xmm5, xmm3
  3723. pslldq xmm2, 8
  3724. psrldq xmm1, 8
  3725. pxor xmm4, xmm2
  3726. pxor xmm5, xmm1
  3727. movdqa xmm0, xmm4
  3728. movdqa xmm1, xmm5
  3729. psrld xmm0, 31
  3730. psrld xmm1, 31
  3731. pslld xmm4, 1
  3732. pslld xmm5, 1
  3733. movdqa xmm2, xmm0
  3734. pslldq xmm0, 4
  3735. psrldq xmm2, 12
  3736. pslldq xmm1, 4
  3737. por xmm5, xmm2
  3738. por xmm4, xmm0
  3739. por xmm5, xmm1
  3740. movdqa xmm0, xmm4
  3741. movdqa xmm1, xmm4
  3742. movdqa xmm2, xmm4
  3743. pslld xmm0, 31
  3744. pslld xmm1, 30
  3745. pslld xmm2, 25
  3746. pxor xmm0, xmm1
  3747. pxor xmm0, xmm2
  3748. movdqa xmm1, xmm0
  3749. psrldq xmm1, 4
  3750. pslldq xmm0, 12
  3751. pxor xmm4, xmm0
  3752. movdqa xmm2, xmm4
  3753. movdqa xmm3, xmm4
  3754. movdqa xmm0, xmm4
  3755. psrld xmm2, 1
  3756. psrld xmm3, 2
  3757. psrld xmm0, 7
  3758. pxor xmm2, xmm3
  3759. pxor xmm2, xmm0
  3760. pxor xmm2, xmm1
  3761. pxor xmm2, xmm4
  3762. pxor xmm5, xmm2
  3763. add ecx, 16
  3764. cmp ecx, edx
  3765. jl L_AES_GCM_aad_update_aesni_16_loop
  3766. movdqa OWORD PTR [r8], xmm5
  3767. ret
  3768. AES_GCM_aad_update_aesni ENDP
  3769. _text ENDS
  3770. _text SEGMENT READONLY PARA
  3771. AES_GCM_encrypt_block_aesni PROC
  3772. mov r10, r8
  3773. mov r11, r9
  3774. mov rax, QWORD PTR [rsp+40]
  3775. movdqu xmm8, [rax]
  3776. movdqa xmm9, xmm8
  3777. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  3778. paddd xmm9, OWORD PTR L_aes_gcm_one
  3779. pxor xmm8, [rcx]
  3780. movdqu [rax], xmm9
  3781. aesenc xmm8, [rcx+16]
  3782. aesenc xmm8, [rcx+32]
  3783. aesenc xmm8, [rcx+48]
  3784. aesenc xmm8, [rcx+64]
  3785. aesenc xmm8, [rcx+80]
  3786. aesenc xmm8, [rcx+96]
  3787. aesenc xmm8, [rcx+112]
  3788. aesenc xmm8, [rcx+128]
  3789. aesenc xmm8, [rcx+144]
  3790. cmp edx, 11
  3791. movdqa xmm9, OWORD PTR [rcx+160]
  3792. jl L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last
  3793. aesenc xmm8, xmm9
  3794. aesenc xmm8, [rcx+176]
  3795. cmp edx, 13
  3796. movdqa xmm9, OWORD PTR [rcx+192]
  3797. jl L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last
  3798. aesenc xmm8, xmm9
  3799. aesenc xmm8, [rcx+208]
  3800. movdqa xmm9, OWORD PTR [rcx+224]
  3801. L_AES_GCM_encrypt_block_aesni_aesenc_block_aesenc_avx_last:
  3802. aesenclast xmm8, xmm9
  3803. movdqu xmm9, [r11]
  3804. pxor xmm8, xmm9
  3805. movdqu [r10], xmm8
  3806. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  3807. ret
  3808. AES_GCM_encrypt_block_aesni ENDP
  3809. _text ENDS
  3810. _text SEGMENT READONLY PARA
  3811. AES_GCM_ghash_block_aesni PROC
  3812. movdqa xmm4, OWORD PTR [rdx]
  3813. movdqa xmm5, OWORD PTR [r8]
  3814. movdqu xmm8, [rcx]
  3815. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  3816. pxor xmm4, xmm8
  3817. pshufd xmm1, xmm4, 78
  3818. pshufd xmm2, xmm5, 78
  3819. movdqa xmm3, xmm5
  3820. movdqa xmm0, xmm5
  3821. pclmulqdq xmm3, xmm4, 17
  3822. pclmulqdq xmm0, xmm4, 0
  3823. pxor xmm1, xmm4
  3824. pxor xmm2, xmm5
  3825. pclmulqdq xmm1, xmm2, 0
  3826. pxor xmm1, xmm0
  3827. pxor xmm1, xmm3
  3828. movdqa xmm2, xmm1
  3829. movdqa xmm6, xmm0
  3830. movdqa xmm4, xmm3
  3831. pslldq xmm2, 8
  3832. psrldq xmm1, 8
  3833. pxor xmm6, xmm2
  3834. pxor xmm4, xmm1
  3835. movdqa xmm0, xmm6
  3836. movdqa xmm1, xmm4
  3837. psrld xmm0, 31
  3838. psrld xmm1, 31
  3839. pslld xmm6, 1
  3840. pslld xmm4, 1
  3841. movdqa xmm2, xmm0
  3842. pslldq xmm0, 4
  3843. psrldq xmm2, 12
  3844. pslldq xmm1, 4
  3845. por xmm4, xmm2
  3846. por xmm6, xmm0
  3847. por xmm4, xmm1
  3848. movdqa xmm0, xmm6
  3849. movdqa xmm1, xmm6
  3850. movdqa xmm2, xmm6
  3851. pslld xmm0, 31
  3852. pslld xmm1, 30
  3853. pslld xmm2, 25
  3854. pxor xmm0, xmm1
  3855. pxor xmm0, xmm2
  3856. movdqa xmm1, xmm0
  3857. psrldq xmm1, 4
  3858. pslldq xmm0, 12
  3859. pxor xmm6, xmm0
  3860. movdqa xmm2, xmm6
  3861. movdqa xmm3, xmm6
  3862. movdqa xmm0, xmm6
  3863. psrld xmm2, 1
  3864. psrld xmm3, 2
  3865. psrld xmm0, 7
  3866. pxor xmm2, xmm3
  3867. pxor xmm2, xmm0
  3868. pxor xmm2, xmm1
  3869. pxor xmm2, xmm6
  3870. pxor xmm4, xmm2
  3871. movdqa OWORD PTR [rdx], xmm4
  3872. ret
  3873. AES_GCM_ghash_block_aesni ENDP
  3874. _text ENDS
  3875. _text SEGMENT READONLY PARA
  3876. AES_GCM_encrypt_update_aesni PROC
  3877. push r13
  3878. push r12
  3879. push r14
  3880. push r15
  3881. push rdi
  3882. mov rax, rcx
  3883. mov r10, r8
  3884. mov r8d, edx
  3885. mov r11, r9
  3886. mov r9d, DWORD PTR [rsp+80]
  3887. mov r12, QWORD PTR [rsp+88]
  3888. mov r14, QWORD PTR [rsp+96]
  3889. mov r15, QWORD PTR [rsp+104]
  3890. sub rsp, 160
  3891. movdqa xmm6, OWORD PTR [r12]
  3892. movdqa xmm5, OWORD PTR [r14]
  3893. movdqa xmm9, xmm5
  3894. movdqa xmm8, xmm5
  3895. psrlq xmm9, 63
  3896. psllq xmm8, 1
  3897. pslldq xmm9, 8
  3898. por xmm8, xmm9
  3899. pshufd xmm5, xmm5, 255
  3900. psrad xmm5, 31
  3901. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  3902. pxor xmm5, xmm8
  3903. xor rdi, rdi
  3904. cmp r9d, 128
  3905. mov r13d, r9d
  3906. jl L_AES_GCM_encrypt_update_aesni_done_128
  3907. and r13d, 4294967168
  3908. movdqa xmm2, xmm6
  3909. ; H ^ 1
  3910. movdqu [rsp], xmm5
  3911. ; H ^ 2
  3912. pshufd xmm9, xmm5, 78
  3913. pshufd xmm10, xmm5, 78
  3914. movdqa xmm11, xmm5
  3915. movdqa xmm8, xmm5
  3916. pclmulqdq xmm11, xmm5, 17
  3917. pclmulqdq xmm8, xmm5, 0
  3918. pxor xmm9, xmm5
  3919. pxor xmm10, xmm5
  3920. pclmulqdq xmm9, xmm10, 0
  3921. pxor xmm9, xmm8
  3922. pxor xmm9, xmm11
  3923. movdqa xmm10, xmm9
  3924. movdqa xmm0, xmm11
  3925. pslldq xmm10, 8
  3926. psrldq xmm9, 8
  3927. pxor xmm8, xmm10
  3928. pxor xmm0, xmm9
  3929. movdqa xmm12, xmm8
  3930. movdqa xmm13, xmm8
  3931. movdqa xmm14, xmm8
  3932. pslld xmm12, 31
  3933. pslld xmm13, 30
  3934. pslld xmm14, 25
  3935. pxor xmm12, xmm13
  3936. pxor xmm12, xmm14
  3937. movdqa xmm13, xmm12
  3938. psrldq xmm13, 4
  3939. pslldq xmm12, 12
  3940. pxor xmm8, xmm12
  3941. movdqa xmm14, xmm8
  3942. movdqa xmm10, xmm8
  3943. movdqa xmm9, xmm8
  3944. psrld xmm14, 1
  3945. psrld xmm10, 2
  3946. psrld xmm9, 7
  3947. pxor xmm14, xmm10
  3948. pxor xmm14, xmm9
  3949. pxor xmm14, xmm13
  3950. pxor xmm14, xmm8
  3951. pxor xmm0, xmm14
  3952. movdqu [rsp+16], xmm0
  3953. ; H ^ 3
  3954. pshufd xmm9, xmm5, 78
  3955. pshufd xmm10, xmm0, 78
  3956. movdqa xmm11, xmm0
  3957. movdqa xmm8, xmm0
  3958. pclmulqdq xmm11, xmm5, 17
  3959. pclmulqdq xmm8, xmm5, 0
  3960. pxor xmm9, xmm5
  3961. pxor xmm10, xmm0
  3962. pclmulqdq xmm9, xmm10, 0
  3963. pxor xmm9, xmm8
  3964. pxor xmm9, xmm11
  3965. movdqa xmm10, xmm9
  3966. movdqa xmm1, xmm11
  3967. pslldq xmm10, 8
  3968. psrldq xmm9, 8
  3969. pxor xmm8, xmm10
  3970. pxor xmm1, xmm9
  3971. movdqa xmm12, xmm8
  3972. movdqa xmm13, xmm8
  3973. movdqa xmm14, xmm8
  3974. pslld xmm12, 31
  3975. pslld xmm13, 30
  3976. pslld xmm14, 25
  3977. pxor xmm12, xmm13
  3978. pxor xmm12, xmm14
  3979. movdqa xmm13, xmm12
  3980. psrldq xmm13, 4
  3981. pslldq xmm12, 12
  3982. pxor xmm8, xmm12
  3983. movdqa xmm14, xmm8
  3984. movdqa xmm10, xmm8
  3985. movdqa xmm9, xmm8
  3986. psrld xmm14, 1
  3987. psrld xmm10, 2
  3988. psrld xmm9, 7
  3989. pxor xmm14, xmm10
  3990. pxor xmm14, xmm9
  3991. pxor xmm14, xmm13
  3992. pxor xmm14, xmm8
  3993. pxor xmm1, xmm14
  3994. movdqu [rsp+32], xmm1
  3995. ; H ^ 4
  3996. pshufd xmm9, xmm0, 78
  3997. pshufd xmm10, xmm0, 78
  3998. movdqa xmm11, xmm0
  3999. movdqa xmm8, xmm0
  4000. pclmulqdq xmm11, xmm0, 17
  4001. pclmulqdq xmm8, xmm0, 0
  4002. pxor xmm9, xmm0
  4003. pxor xmm10, xmm0
  4004. pclmulqdq xmm9, xmm10, 0
  4005. pxor xmm9, xmm8
  4006. pxor xmm9, xmm11
  4007. movdqa xmm10, xmm9
  4008. movdqa xmm3, xmm11
  4009. pslldq xmm10, 8
  4010. psrldq xmm9, 8
  4011. pxor xmm8, xmm10
  4012. pxor xmm3, xmm9
  4013. movdqa xmm12, xmm8
  4014. movdqa xmm13, xmm8
  4015. movdqa xmm14, xmm8
  4016. pslld xmm12, 31
  4017. pslld xmm13, 30
  4018. pslld xmm14, 25
  4019. pxor xmm12, xmm13
  4020. pxor xmm12, xmm14
  4021. movdqa xmm13, xmm12
  4022. psrldq xmm13, 4
  4023. pslldq xmm12, 12
  4024. pxor xmm8, xmm12
  4025. movdqa xmm14, xmm8
  4026. movdqa xmm10, xmm8
  4027. movdqa xmm9, xmm8
  4028. psrld xmm14, 1
  4029. psrld xmm10, 2
  4030. psrld xmm9, 7
  4031. pxor xmm14, xmm10
  4032. pxor xmm14, xmm9
  4033. pxor xmm14, xmm13
  4034. pxor xmm14, xmm8
  4035. pxor xmm3, xmm14
  4036. movdqu [rsp+48], xmm3
  4037. ; H ^ 5
  4038. pshufd xmm9, xmm0, 78
  4039. pshufd xmm10, xmm1, 78
  4040. movdqa xmm11, xmm1
  4041. movdqa xmm8, xmm1
  4042. pclmulqdq xmm11, xmm0, 17
  4043. pclmulqdq xmm8, xmm0, 0
  4044. pxor xmm9, xmm0
  4045. pxor xmm10, xmm1
  4046. pclmulqdq xmm9, xmm10, 0
  4047. pxor xmm9, xmm8
  4048. pxor xmm9, xmm11
  4049. movdqa xmm10, xmm9
  4050. movdqa xmm7, xmm11
  4051. pslldq xmm10, 8
  4052. psrldq xmm9, 8
  4053. pxor xmm8, xmm10
  4054. pxor xmm7, xmm9
  4055. movdqa xmm12, xmm8
  4056. movdqa xmm13, xmm8
  4057. movdqa xmm14, xmm8
  4058. pslld xmm12, 31
  4059. pslld xmm13, 30
  4060. pslld xmm14, 25
  4061. pxor xmm12, xmm13
  4062. pxor xmm12, xmm14
  4063. movdqa xmm13, xmm12
  4064. psrldq xmm13, 4
  4065. pslldq xmm12, 12
  4066. pxor xmm8, xmm12
  4067. movdqa xmm14, xmm8
  4068. movdqa xmm10, xmm8
  4069. movdqa xmm9, xmm8
  4070. psrld xmm14, 1
  4071. psrld xmm10, 2
  4072. psrld xmm9, 7
  4073. pxor xmm14, xmm10
  4074. pxor xmm14, xmm9
  4075. pxor xmm14, xmm13
  4076. pxor xmm14, xmm8
  4077. pxor xmm7, xmm14
  4078. movdqu [rsp+64], xmm7
  4079. ; H ^ 6
  4080. pshufd xmm9, xmm1, 78
  4081. pshufd xmm10, xmm1, 78
  4082. movdqa xmm11, xmm1
  4083. movdqa xmm8, xmm1
  4084. pclmulqdq xmm11, xmm1, 17
  4085. pclmulqdq xmm8, xmm1, 0
  4086. pxor xmm9, xmm1
  4087. pxor xmm10, xmm1
  4088. pclmulqdq xmm9, xmm10, 0
  4089. pxor xmm9, xmm8
  4090. pxor xmm9, xmm11
  4091. movdqa xmm10, xmm9
  4092. movdqa xmm7, xmm11
  4093. pslldq xmm10, 8
  4094. psrldq xmm9, 8
  4095. pxor xmm8, xmm10
  4096. pxor xmm7, xmm9
  4097. movdqa xmm12, xmm8
  4098. movdqa xmm13, xmm8
  4099. movdqa xmm14, xmm8
  4100. pslld xmm12, 31
  4101. pslld xmm13, 30
  4102. pslld xmm14, 25
  4103. pxor xmm12, xmm13
  4104. pxor xmm12, xmm14
  4105. movdqa xmm13, xmm12
  4106. psrldq xmm13, 4
  4107. pslldq xmm12, 12
  4108. pxor xmm8, xmm12
  4109. movdqa xmm14, xmm8
  4110. movdqa xmm10, xmm8
  4111. movdqa xmm9, xmm8
  4112. psrld xmm14, 1
  4113. psrld xmm10, 2
  4114. psrld xmm9, 7
  4115. pxor xmm14, xmm10
  4116. pxor xmm14, xmm9
  4117. pxor xmm14, xmm13
  4118. pxor xmm14, xmm8
  4119. pxor xmm7, xmm14
  4120. movdqu [rsp+80], xmm7
  4121. ; H ^ 7
  4122. pshufd xmm9, xmm1, 78
  4123. pshufd xmm10, xmm3, 78
  4124. movdqa xmm11, xmm3
  4125. movdqa xmm8, xmm3
  4126. pclmulqdq xmm11, xmm1, 17
  4127. pclmulqdq xmm8, xmm1, 0
  4128. pxor xmm9, xmm1
  4129. pxor xmm10, xmm3
  4130. pclmulqdq xmm9, xmm10, 0
  4131. pxor xmm9, xmm8
  4132. pxor xmm9, xmm11
  4133. movdqa xmm10, xmm9
  4134. movdqa xmm7, xmm11
  4135. pslldq xmm10, 8
  4136. psrldq xmm9, 8
  4137. pxor xmm8, xmm10
  4138. pxor xmm7, xmm9
  4139. movdqa xmm12, xmm8
  4140. movdqa xmm13, xmm8
  4141. movdqa xmm14, xmm8
  4142. pslld xmm12, 31
  4143. pslld xmm13, 30
  4144. pslld xmm14, 25
  4145. pxor xmm12, xmm13
  4146. pxor xmm12, xmm14
  4147. movdqa xmm13, xmm12
  4148. psrldq xmm13, 4
  4149. pslldq xmm12, 12
  4150. pxor xmm8, xmm12
  4151. movdqa xmm14, xmm8
  4152. movdqa xmm10, xmm8
  4153. movdqa xmm9, xmm8
  4154. psrld xmm14, 1
  4155. psrld xmm10, 2
  4156. psrld xmm9, 7
  4157. pxor xmm14, xmm10
  4158. pxor xmm14, xmm9
  4159. pxor xmm14, xmm13
  4160. pxor xmm14, xmm8
  4161. pxor xmm7, xmm14
  4162. movdqu [rsp+96], xmm7
  4163. ; H ^ 8
  4164. pshufd xmm9, xmm3, 78
  4165. pshufd xmm10, xmm3, 78
  4166. movdqa xmm11, xmm3
  4167. movdqa xmm8, xmm3
  4168. pclmulqdq xmm11, xmm3, 17
  4169. pclmulqdq xmm8, xmm3, 0
  4170. pxor xmm9, xmm3
  4171. pxor xmm10, xmm3
  4172. pclmulqdq xmm9, xmm10, 0
  4173. pxor xmm9, xmm8
  4174. pxor xmm9, xmm11
  4175. movdqa xmm10, xmm9
  4176. movdqa xmm7, xmm11
  4177. pslldq xmm10, 8
  4178. psrldq xmm9, 8
  4179. pxor xmm8, xmm10
  4180. pxor xmm7, xmm9
  4181. movdqa xmm12, xmm8
  4182. movdqa xmm13, xmm8
  4183. movdqa xmm14, xmm8
  4184. pslld xmm12, 31
  4185. pslld xmm13, 30
  4186. pslld xmm14, 25
  4187. pxor xmm12, xmm13
  4188. pxor xmm12, xmm14
  4189. movdqa xmm13, xmm12
  4190. psrldq xmm13, 4
  4191. pslldq xmm12, 12
  4192. pxor xmm8, xmm12
  4193. movdqa xmm14, xmm8
  4194. movdqa xmm10, xmm8
  4195. movdqa xmm9, xmm8
  4196. psrld xmm14, 1
  4197. psrld xmm10, 2
  4198. psrld xmm9, 7
  4199. pxor xmm14, xmm10
  4200. pxor xmm14, xmm9
  4201. pxor xmm14, xmm13
  4202. pxor xmm14, xmm8
  4203. pxor xmm7, xmm14
  4204. movdqu [rsp+112], xmm7
  4205. ; First 128 bytes of input
  4206. movdqu xmm8, [r15]
  4207. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  4208. movdqa xmm0, xmm8
  4209. pshufb xmm8, xmm1
  4210. movdqa xmm9, xmm0
  4211. paddd xmm9, OWORD PTR L_aes_gcm_one
  4212. pshufb xmm9, xmm1
  4213. movdqa xmm10, xmm0
  4214. paddd xmm10, OWORD PTR L_aes_gcm_two
  4215. pshufb xmm10, xmm1
  4216. movdqa xmm11, xmm0
  4217. paddd xmm11, OWORD PTR L_aes_gcm_three
  4218. pshufb xmm11, xmm1
  4219. movdqa xmm12, xmm0
  4220. paddd xmm12, OWORD PTR L_aes_gcm_four
  4221. pshufb xmm12, xmm1
  4222. movdqa xmm13, xmm0
  4223. paddd xmm13, OWORD PTR L_aes_gcm_five
  4224. pshufb xmm13, xmm1
  4225. movdqa xmm14, xmm0
  4226. paddd xmm14, OWORD PTR L_aes_gcm_six
  4227. pshufb xmm14, xmm1
  4228. movdqa xmm15, xmm0
  4229. paddd xmm15, OWORD PTR L_aes_gcm_seven
  4230. pshufb xmm15, xmm1
  4231. paddd xmm0, OWORD PTR L_aes_gcm_eight
  4232. movdqa xmm7, OWORD PTR [rax]
  4233. movdqu [r15], xmm0
  4234. pxor xmm8, xmm7
  4235. pxor xmm9, xmm7
  4236. pxor xmm10, xmm7
  4237. pxor xmm11, xmm7
  4238. pxor xmm12, xmm7
  4239. pxor xmm13, xmm7
  4240. pxor xmm14, xmm7
  4241. pxor xmm15, xmm7
  4242. movdqa xmm7, OWORD PTR [rax+16]
  4243. aesenc xmm8, xmm7
  4244. aesenc xmm9, xmm7
  4245. aesenc xmm10, xmm7
  4246. aesenc xmm11, xmm7
  4247. aesenc xmm12, xmm7
  4248. aesenc xmm13, xmm7
  4249. aesenc xmm14, xmm7
  4250. aesenc xmm15, xmm7
  4251. movdqa xmm7, OWORD PTR [rax+32]
  4252. aesenc xmm8, xmm7
  4253. aesenc xmm9, xmm7
  4254. aesenc xmm10, xmm7
  4255. aesenc xmm11, xmm7
  4256. aesenc xmm12, xmm7
  4257. aesenc xmm13, xmm7
  4258. aesenc xmm14, xmm7
  4259. aesenc xmm15, xmm7
  4260. movdqa xmm7, OWORD PTR [rax+48]
  4261. aesenc xmm8, xmm7
  4262. aesenc xmm9, xmm7
  4263. aesenc xmm10, xmm7
  4264. aesenc xmm11, xmm7
  4265. aesenc xmm12, xmm7
  4266. aesenc xmm13, xmm7
  4267. aesenc xmm14, xmm7
  4268. aesenc xmm15, xmm7
  4269. movdqa xmm7, OWORD PTR [rax+64]
  4270. aesenc xmm8, xmm7
  4271. aesenc xmm9, xmm7
  4272. aesenc xmm10, xmm7
  4273. aesenc xmm11, xmm7
  4274. aesenc xmm12, xmm7
  4275. aesenc xmm13, xmm7
  4276. aesenc xmm14, xmm7
  4277. aesenc xmm15, xmm7
  4278. movdqa xmm7, OWORD PTR [rax+80]
  4279. aesenc xmm8, xmm7
  4280. aesenc xmm9, xmm7
  4281. aesenc xmm10, xmm7
  4282. aesenc xmm11, xmm7
  4283. aesenc xmm12, xmm7
  4284. aesenc xmm13, xmm7
  4285. aesenc xmm14, xmm7
  4286. aesenc xmm15, xmm7
  4287. movdqa xmm7, OWORD PTR [rax+96]
  4288. aesenc xmm8, xmm7
  4289. aesenc xmm9, xmm7
  4290. aesenc xmm10, xmm7
  4291. aesenc xmm11, xmm7
  4292. aesenc xmm12, xmm7
  4293. aesenc xmm13, xmm7
  4294. aesenc xmm14, xmm7
  4295. aesenc xmm15, xmm7
  4296. movdqa xmm7, OWORD PTR [rax+112]
  4297. aesenc xmm8, xmm7
  4298. aesenc xmm9, xmm7
  4299. aesenc xmm10, xmm7
  4300. aesenc xmm11, xmm7
  4301. aesenc xmm12, xmm7
  4302. aesenc xmm13, xmm7
  4303. aesenc xmm14, xmm7
  4304. aesenc xmm15, xmm7
  4305. movdqa xmm7, OWORD PTR [rax+128]
  4306. aesenc xmm8, xmm7
  4307. aesenc xmm9, xmm7
  4308. aesenc xmm10, xmm7
  4309. aesenc xmm11, xmm7
  4310. aesenc xmm12, xmm7
  4311. aesenc xmm13, xmm7
  4312. aesenc xmm14, xmm7
  4313. aesenc xmm15, xmm7
  4314. movdqa xmm7, OWORD PTR [rax+144]
  4315. aesenc xmm8, xmm7
  4316. aesenc xmm9, xmm7
  4317. aesenc xmm10, xmm7
  4318. aesenc xmm11, xmm7
  4319. aesenc xmm12, xmm7
  4320. aesenc xmm13, xmm7
  4321. aesenc xmm14, xmm7
  4322. aesenc xmm15, xmm7
  4323. cmp r8d, 11
  4324. movdqa xmm7, OWORD PTR [rax+160]
  4325. jl L_AES_GCM_encrypt_update_aesni_enc_done
  4326. aesenc xmm8, xmm7
  4327. aesenc xmm9, xmm7
  4328. aesenc xmm10, xmm7
  4329. aesenc xmm11, xmm7
  4330. aesenc xmm12, xmm7
  4331. aesenc xmm13, xmm7
  4332. aesenc xmm14, xmm7
  4333. aesenc xmm15, xmm7
  4334. movdqa xmm7, OWORD PTR [rax+176]
  4335. aesenc xmm8, xmm7
  4336. aesenc xmm9, xmm7
  4337. aesenc xmm10, xmm7
  4338. aesenc xmm11, xmm7
  4339. aesenc xmm12, xmm7
  4340. aesenc xmm13, xmm7
  4341. aesenc xmm14, xmm7
  4342. aesenc xmm15, xmm7
  4343. cmp r8d, 13
  4344. movdqa xmm7, OWORD PTR [rax+192]
  4345. jl L_AES_GCM_encrypt_update_aesni_enc_done
  4346. aesenc xmm8, xmm7
  4347. aesenc xmm9, xmm7
  4348. aesenc xmm10, xmm7
  4349. aesenc xmm11, xmm7
  4350. aesenc xmm12, xmm7
  4351. aesenc xmm13, xmm7
  4352. aesenc xmm14, xmm7
  4353. aesenc xmm15, xmm7
  4354. movdqa xmm7, OWORD PTR [rax+208]
  4355. aesenc xmm8, xmm7
  4356. aesenc xmm9, xmm7
  4357. aesenc xmm10, xmm7
  4358. aesenc xmm11, xmm7
  4359. aesenc xmm12, xmm7
  4360. aesenc xmm13, xmm7
  4361. aesenc xmm14, xmm7
  4362. aesenc xmm15, xmm7
  4363. movdqa xmm7, OWORD PTR [rax+224]
  4364. L_AES_GCM_encrypt_update_aesni_enc_done:
  4365. aesenclast xmm8, xmm7
  4366. aesenclast xmm9, xmm7
  4367. movdqu xmm0, [r11]
  4368. movdqu xmm1, [r11+16]
  4369. pxor xmm8, xmm0
  4370. pxor xmm9, xmm1
  4371. movdqu [r10], xmm8
  4372. movdqu [r10+16], xmm9
  4373. aesenclast xmm10, xmm7
  4374. aesenclast xmm11, xmm7
  4375. movdqu xmm0, [r11+32]
  4376. movdqu xmm1, [r11+48]
  4377. pxor xmm10, xmm0
  4378. pxor xmm11, xmm1
  4379. movdqu [r10+32], xmm10
  4380. movdqu [r10+48], xmm11
  4381. aesenclast xmm12, xmm7
  4382. aesenclast xmm13, xmm7
  4383. movdqu xmm0, [r11+64]
  4384. movdqu xmm1, [r11+80]
  4385. pxor xmm12, xmm0
  4386. pxor xmm13, xmm1
  4387. movdqu [r10+64], xmm12
  4388. movdqu [r10+80], xmm13
  4389. aesenclast xmm14, xmm7
  4390. aesenclast xmm15, xmm7
  4391. movdqu xmm0, [r11+96]
  4392. movdqu xmm1, [r11+112]
  4393. pxor xmm14, xmm0
  4394. pxor xmm15, xmm1
  4395. movdqu [r10+96], xmm14
  4396. movdqu [r10+112], xmm15
  4397. cmp r13d, 128
  4398. mov edi, 128
  4399. jle L_AES_GCM_encrypt_update_aesni_end_128
  4400. ; More 128 bytes of input
  4401. L_AES_GCM_encrypt_update_aesni_ghash_128:
  4402. lea rcx, QWORD PTR [r11+rdi]
  4403. lea rdx, QWORD PTR [r10+rdi]
  4404. movdqu xmm8, [r15]
  4405. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  4406. movdqa xmm0, xmm8
  4407. pshufb xmm8, xmm1
  4408. movdqa xmm9, xmm0
  4409. paddd xmm9, OWORD PTR L_aes_gcm_one
  4410. pshufb xmm9, xmm1
  4411. movdqa xmm10, xmm0
  4412. paddd xmm10, OWORD PTR L_aes_gcm_two
  4413. pshufb xmm10, xmm1
  4414. movdqa xmm11, xmm0
  4415. paddd xmm11, OWORD PTR L_aes_gcm_three
  4416. pshufb xmm11, xmm1
  4417. movdqa xmm12, xmm0
  4418. paddd xmm12, OWORD PTR L_aes_gcm_four
  4419. pshufb xmm12, xmm1
  4420. movdqa xmm13, xmm0
  4421. paddd xmm13, OWORD PTR L_aes_gcm_five
  4422. pshufb xmm13, xmm1
  4423. movdqa xmm14, xmm0
  4424. paddd xmm14, OWORD PTR L_aes_gcm_six
  4425. pshufb xmm14, xmm1
  4426. movdqa xmm15, xmm0
  4427. paddd xmm15, OWORD PTR L_aes_gcm_seven
  4428. pshufb xmm15, xmm1
  4429. paddd xmm0, OWORD PTR L_aes_gcm_eight
  4430. movdqa xmm7, OWORD PTR [rax]
  4431. movdqu [r15], xmm0
  4432. pxor xmm8, xmm7
  4433. pxor xmm9, xmm7
  4434. pxor xmm10, xmm7
  4435. pxor xmm11, xmm7
  4436. pxor xmm12, xmm7
  4437. pxor xmm13, xmm7
  4438. pxor xmm14, xmm7
  4439. pxor xmm15, xmm7
  4440. movdqu xmm7, [rsp+112]
  4441. movdqu xmm0, [rdx+-128]
  4442. aesenc xmm8, [rax+16]
  4443. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4444. pxor xmm0, xmm2
  4445. pshufd xmm1, xmm7, 78
  4446. pshufd xmm5, xmm0, 78
  4447. pxor xmm1, xmm7
  4448. pxor xmm5, xmm0
  4449. movdqa xmm3, xmm0
  4450. pclmulqdq xmm3, xmm7, 17
  4451. aesenc xmm9, [rax+16]
  4452. aesenc xmm10, [rax+16]
  4453. movdqa xmm2, xmm0
  4454. pclmulqdq xmm2, xmm7, 0
  4455. aesenc xmm11, [rax+16]
  4456. aesenc xmm12, [rax+16]
  4457. pclmulqdq xmm1, xmm5, 0
  4458. aesenc xmm13, [rax+16]
  4459. aesenc xmm14, [rax+16]
  4460. aesenc xmm15, [rax+16]
  4461. pxor xmm1, xmm2
  4462. pxor xmm1, xmm3
  4463. movdqu xmm7, [rsp+96]
  4464. movdqu xmm0, [rdx+-112]
  4465. pshufd xmm4, xmm7, 78
  4466. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4467. aesenc xmm8, [rax+32]
  4468. pxor xmm4, xmm7
  4469. pshufd xmm5, xmm0, 78
  4470. pxor xmm5, xmm0
  4471. movdqa xmm6, xmm0
  4472. pclmulqdq xmm6, xmm7, 17
  4473. aesenc xmm9, [rax+32]
  4474. aesenc xmm10, [rax+32]
  4475. pclmulqdq xmm7, xmm0, 0
  4476. aesenc xmm11, [rax+32]
  4477. aesenc xmm12, [rax+32]
  4478. pclmulqdq xmm4, xmm5, 0
  4479. aesenc xmm13, [rax+32]
  4480. aesenc xmm14, [rax+32]
  4481. aesenc xmm15, [rax+32]
  4482. pxor xmm1, xmm7
  4483. pxor xmm2, xmm7
  4484. pxor xmm1, xmm6
  4485. pxor xmm3, xmm6
  4486. pxor xmm1, xmm4
  4487. movdqu xmm7, [rsp+80]
  4488. movdqu xmm0, [rdx+-96]
  4489. pshufd xmm4, xmm7, 78
  4490. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4491. aesenc xmm8, [rax+48]
  4492. pxor xmm4, xmm7
  4493. pshufd xmm5, xmm0, 78
  4494. pxor xmm5, xmm0
  4495. movdqa xmm6, xmm0
  4496. pclmulqdq xmm6, xmm7, 17
  4497. aesenc xmm9, [rax+48]
  4498. aesenc xmm10, [rax+48]
  4499. pclmulqdq xmm7, xmm0, 0
  4500. aesenc xmm11, [rax+48]
  4501. aesenc xmm12, [rax+48]
  4502. pclmulqdq xmm4, xmm5, 0
  4503. aesenc xmm13, [rax+48]
  4504. aesenc xmm14, [rax+48]
  4505. aesenc xmm15, [rax+48]
  4506. pxor xmm1, xmm7
  4507. pxor xmm2, xmm7
  4508. pxor xmm1, xmm6
  4509. pxor xmm3, xmm6
  4510. pxor xmm1, xmm4
  4511. movdqu xmm7, [rsp+64]
  4512. movdqu xmm0, [rdx+-80]
  4513. pshufd xmm4, xmm7, 78
  4514. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4515. aesenc xmm8, [rax+64]
  4516. pxor xmm4, xmm7
  4517. pshufd xmm5, xmm0, 78
  4518. pxor xmm5, xmm0
  4519. movdqa xmm6, xmm0
  4520. pclmulqdq xmm6, xmm7, 17
  4521. aesenc xmm9, [rax+64]
  4522. aesenc xmm10, [rax+64]
  4523. pclmulqdq xmm7, xmm0, 0
  4524. aesenc xmm11, [rax+64]
  4525. aesenc xmm12, [rax+64]
  4526. pclmulqdq xmm4, xmm5, 0
  4527. aesenc xmm13, [rax+64]
  4528. aesenc xmm14, [rax+64]
  4529. aesenc xmm15, [rax+64]
  4530. pxor xmm1, xmm7
  4531. pxor xmm2, xmm7
  4532. pxor xmm1, xmm6
  4533. pxor xmm3, xmm6
  4534. pxor xmm1, xmm4
  4535. movdqu xmm7, [rsp+48]
  4536. movdqu xmm0, [rdx+-64]
  4537. pshufd xmm4, xmm7, 78
  4538. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4539. aesenc xmm8, [rax+80]
  4540. pxor xmm4, xmm7
  4541. pshufd xmm5, xmm0, 78
  4542. pxor xmm5, xmm0
  4543. movdqa xmm6, xmm0
  4544. pclmulqdq xmm6, xmm7, 17
  4545. aesenc xmm9, [rax+80]
  4546. aesenc xmm10, [rax+80]
  4547. pclmulqdq xmm7, xmm0, 0
  4548. aesenc xmm11, [rax+80]
  4549. aesenc xmm12, [rax+80]
  4550. pclmulqdq xmm4, xmm5, 0
  4551. aesenc xmm13, [rax+80]
  4552. aesenc xmm14, [rax+80]
  4553. aesenc xmm15, [rax+80]
  4554. pxor xmm1, xmm7
  4555. pxor xmm2, xmm7
  4556. pxor xmm1, xmm6
  4557. pxor xmm3, xmm6
  4558. pxor xmm1, xmm4
  4559. movdqu xmm7, [rsp+32]
  4560. movdqu xmm0, [rdx+-48]
  4561. pshufd xmm4, xmm7, 78
  4562. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4563. aesenc xmm8, [rax+96]
  4564. pxor xmm4, xmm7
  4565. pshufd xmm5, xmm0, 78
  4566. pxor xmm5, xmm0
  4567. movdqa xmm6, xmm0
  4568. pclmulqdq xmm6, xmm7, 17
  4569. aesenc xmm9, [rax+96]
  4570. aesenc xmm10, [rax+96]
  4571. pclmulqdq xmm7, xmm0, 0
  4572. aesenc xmm11, [rax+96]
  4573. aesenc xmm12, [rax+96]
  4574. pclmulqdq xmm4, xmm5, 0
  4575. aesenc xmm13, [rax+96]
  4576. aesenc xmm14, [rax+96]
  4577. aesenc xmm15, [rax+96]
  4578. pxor xmm1, xmm7
  4579. pxor xmm2, xmm7
  4580. pxor xmm1, xmm6
  4581. pxor xmm3, xmm6
  4582. pxor xmm1, xmm4
  4583. movdqu xmm7, [rsp+16]
  4584. movdqu xmm0, [rdx+-32]
  4585. pshufd xmm4, xmm7, 78
  4586. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4587. aesenc xmm8, [rax+112]
  4588. pxor xmm4, xmm7
  4589. pshufd xmm5, xmm0, 78
  4590. pxor xmm5, xmm0
  4591. movdqa xmm6, xmm0
  4592. pclmulqdq xmm6, xmm7, 17
  4593. aesenc xmm9, [rax+112]
  4594. aesenc xmm10, [rax+112]
  4595. pclmulqdq xmm7, xmm0, 0
  4596. aesenc xmm11, [rax+112]
  4597. aesenc xmm12, [rax+112]
  4598. pclmulqdq xmm4, xmm5, 0
  4599. aesenc xmm13, [rax+112]
  4600. aesenc xmm14, [rax+112]
  4601. aesenc xmm15, [rax+112]
  4602. pxor xmm1, xmm7
  4603. pxor xmm2, xmm7
  4604. pxor xmm1, xmm6
  4605. pxor xmm3, xmm6
  4606. pxor xmm1, xmm4
  4607. movdqu xmm7, [rsp]
  4608. movdqu xmm0, [rdx+-16]
  4609. pshufd xmm4, xmm7, 78
  4610. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  4611. aesenc xmm8, [rax+128]
  4612. pxor xmm4, xmm7
  4613. pshufd xmm5, xmm0, 78
  4614. pxor xmm5, xmm0
  4615. movdqa xmm6, xmm0
  4616. pclmulqdq xmm6, xmm7, 17
  4617. aesenc xmm9, [rax+128]
  4618. aesenc xmm10, [rax+128]
  4619. pclmulqdq xmm7, xmm0, 0
  4620. aesenc xmm11, [rax+128]
  4621. aesenc xmm12, [rax+128]
  4622. pclmulqdq xmm4, xmm5, 0
  4623. aesenc xmm13, [rax+128]
  4624. aesenc xmm14, [rax+128]
  4625. aesenc xmm15, [rax+128]
  4626. pxor xmm1, xmm7
  4627. pxor xmm2, xmm7
  4628. pxor xmm1, xmm6
  4629. pxor xmm3, xmm6
  4630. pxor xmm1, xmm4
  4631. movdqa xmm5, xmm1
  4632. psrldq xmm1, 8
  4633. pslldq xmm5, 8
  4634. aesenc xmm8, [rax+144]
  4635. pxor xmm2, xmm5
  4636. pxor xmm3, xmm1
  4637. movdqa xmm7, xmm2
  4638. movdqa xmm4, xmm2
  4639. movdqa xmm5, xmm2
  4640. aesenc xmm9, [rax+144]
  4641. pslld xmm7, 31
  4642. pslld xmm4, 30
  4643. pslld xmm5, 25
  4644. aesenc xmm10, [rax+144]
  4645. pxor xmm7, xmm4
  4646. pxor xmm7, xmm5
  4647. aesenc xmm11, [rax+144]
  4648. movdqa xmm4, xmm7
  4649. pslldq xmm7, 12
  4650. psrldq xmm4, 4
  4651. aesenc xmm12, [rax+144]
  4652. pxor xmm2, xmm7
  4653. movdqa xmm5, xmm2
  4654. movdqa xmm1, xmm2
  4655. movdqa xmm0, xmm2
  4656. aesenc xmm13, [rax+144]
  4657. psrld xmm5, 1
  4658. psrld xmm1, 2
  4659. psrld xmm0, 7
  4660. aesenc xmm14, [rax+144]
  4661. pxor xmm5, xmm1
  4662. pxor xmm5, xmm0
  4663. aesenc xmm15, [rax+144]
  4664. pxor xmm5, xmm4
  4665. pxor xmm2, xmm5
  4666. pxor xmm2, xmm3
  4667. cmp r8d, 11
  4668. movdqa xmm7, OWORD PTR [rax+160]
  4669. jl L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done
  4670. aesenc xmm8, xmm7
  4671. aesenc xmm9, xmm7
  4672. aesenc xmm10, xmm7
  4673. aesenc xmm11, xmm7
  4674. aesenc xmm12, xmm7
  4675. aesenc xmm13, xmm7
  4676. aesenc xmm14, xmm7
  4677. aesenc xmm15, xmm7
  4678. movdqa xmm7, OWORD PTR [rax+176]
  4679. aesenc xmm8, xmm7
  4680. aesenc xmm9, xmm7
  4681. aesenc xmm10, xmm7
  4682. aesenc xmm11, xmm7
  4683. aesenc xmm12, xmm7
  4684. aesenc xmm13, xmm7
  4685. aesenc xmm14, xmm7
  4686. aesenc xmm15, xmm7
  4687. cmp r8d, 13
  4688. movdqa xmm7, OWORD PTR [rax+192]
  4689. jl L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done
  4690. aesenc xmm8, xmm7
  4691. aesenc xmm9, xmm7
  4692. aesenc xmm10, xmm7
  4693. aesenc xmm11, xmm7
  4694. aesenc xmm12, xmm7
  4695. aesenc xmm13, xmm7
  4696. aesenc xmm14, xmm7
  4697. aesenc xmm15, xmm7
  4698. movdqa xmm7, OWORD PTR [rax+208]
  4699. aesenc xmm8, xmm7
  4700. aesenc xmm9, xmm7
  4701. aesenc xmm10, xmm7
  4702. aesenc xmm11, xmm7
  4703. aesenc xmm12, xmm7
  4704. aesenc xmm13, xmm7
  4705. aesenc xmm14, xmm7
  4706. aesenc xmm15, xmm7
  4707. movdqa xmm7, OWORD PTR [rax+224]
  4708. L_AES_GCM_encrypt_update_aesni_aesenc_128_ghash_avx_done:
  4709. aesenclast xmm8, xmm7
  4710. aesenclast xmm9, xmm7
  4711. movdqu xmm0, [rcx]
  4712. movdqu xmm1, [rcx+16]
  4713. pxor xmm8, xmm0
  4714. pxor xmm9, xmm1
  4715. movdqu [rdx], xmm8
  4716. movdqu [rdx+16], xmm9
  4717. aesenclast xmm10, xmm7
  4718. aesenclast xmm11, xmm7
  4719. movdqu xmm0, [rcx+32]
  4720. movdqu xmm1, [rcx+48]
  4721. pxor xmm10, xmm0
  4722. pxor xmm11, xmm1
  4723. movdqu [rdx+32], xmm10
  4724. movdqu [rdx+48], xmm11
  4725. aesenclast xmm12, xmm7
  4726. aesenclast xmm13, xmm7
  4727. movdqu xmm0, [rcx+64]
  4728. movdqu xmm1, [rcx+80]
  4729. pxor xmm12, xmm0
  4730. pxor xmm13, xmm1
  4731. movdqu [rdx+64], xmm12
  4732. movdqu [rdx+80], xmm13
  4733. aesenclast xmm14, xmm7
  4734. aesenclast xmm15, xmm7
  4735. movdqu xmm0, [rcx+96]
  4736. movdqu xmm1, [rcx+112]
  4737. pxor xmm14, xmm0
  4738. pxor xmm15, xmm1
  4739. movdqu [rdx+96], xmm14
  4740. movdqu [rdx+112], xmm15
  4741. add edi, 128
  4742. cmp edi, r13d
  4743. jl L_AES_GCM_encrypt_update_aesni_ghash_128
  4744. L_AES_GCM_encrypt_update_aesni_end_128:
  4745. movdqa xmm4, OWORD PTR L_aes_gcm_bswap_mask
  4746. pshufb xmm8, xmm4
  4747. pshufb xmm9, xmm4
  4748. pshufb xmm10, xmm4
  4749. pshufb xmm11, xmm4
  4750. pxor xmm8, xmm2
  4751. pshufb xmm12, xmm4
  4752. pshufb xmm13, xmm4
  4753. pshufb xmm14, xmm4
  4754. pshufb xmm15, xmm4
  4755. movdqu xmm7, [rsp+112]
  4756. pshufd xmm1, xmm8, 78
  4757. pshufd xmm2, xmm7, 78
  4758. movdqa xmm3, xmm7
  4759. movdqa xmm0, xmm7
  4760. pclmulqdq xmm3, xmm8, 17
  4761. pclmulqdq xmm0, xmm8, 0
  4762. pxor xmm1, xmm8
  4763. pxor xmm2, xmm7
  4764. pclmulqdq xmm1, xmm2, 0
  4765. pxor xmm1, xmm0
  4766. pxor xmm1, xmm3
  4767. movdqa xmm2, xmm1
  4768. movdqa xmm4, xmm0
  4769. movdqa xmm6, xmm3
  4770. pslldq xmm2, 8
  4771. psrldq xmm1, 8
  4772. pxor xmm4, xmm2
  4773. pxor xmm6, xmm1
  4774. movdqu xmm7, [rsp+96]
  4775. pshufd xmm1, xmm9, 78
  4776. pshufd xmm2, xmm7, 78
  4777. movdqa xmm3, xmm7
  4778. movdqa xmm0, xmm7
  4779. pclmulqdq xmm3, xmm9, 17
  4780. pclmulqdq xmm0, xmm9, 0
  4781. pxor xmm1, xmm9
  4782. pxor xmm2, xmm7
  4783. pclmulqdq xmm1, xmm2, 0
  4784. pxor xmm1, xmm0
  4785. pxor xmm1, xmm3
  4786. movdqa xmm2, xmm1
  4787. pxor xmm4, xmm0
  4788. pxor xmm6, xmm3
  4789. pslldq xmm2, 8
  4790. psrldq xmm1, 8
  4791. pxor xmm4, xmm2
  4792. pxor xmm6, xmm1
  4793. movdqu xmm7, [rsp+80]
  4794. pshufd xmm1, xmm10, 78
  4795. pshufd xmm2, xmm7, 78
  4796. movdqa xmm3, xmm7
  4797. movdqa xmm0, xmm7
  4798. pclmulqdq xmm3, xmm10, 17
  4799. pclmulqdq xmm0, xmm10, 0
  4800. pxor xmm1, xmm10
  4801. pxor xmm2, xmm7
  4802. pclmulqdq xmm1, xmm2, 0
  4803. pxor xmm1, xmm0
  4804. pxor xmm1, xmm3
  4805. movdqa xmm2, xmm1
  4806. pxor xmm4, xmm0
  4807. pxor xmm6, xmm3
  4808. pslldq xmm2, 8
  4809. psrldq xmm1, 8
  4810. pxor xmm4, xmm2
  4811. pxor xmm6, xmm1
  4812. movdqu xmm7, [rsp+64]
  4813. pshufd xmm1, xmm11, 78
  4814. pshufd xmm2, xmm7, 78
  4815. movdqa xmm3, xmm7
  4816. movdqa xmm0, xmm7
  4817. pclmulqdq xmm3, xmm11, 17
  4818. pclmulqdq xmm0, xmm11, 0
  4819. pxor xmm1, xmm11
  4820. pxor xmm2, xmm7
  4821. pclmulqdq xmm1, xmm2, 0
  4822. pxor xmm1, xmm0
  4823. pxor xmm1, xmm3
  4824. movdqa xmm2, xmm1
  4825. pxor xmm4, xmm0
  4826. pxor xmm6, xmm3
  4827. pslldq xmm2, 8
  4828. psrldq xmm1, 8
  4829. pxor xmm4, xmm2
  4830. pxor xmm6, xmm1
  4831. movdqu xmm7, [rsp+48]
  4832. pshufd xmm1, xmm12, 78
  4833. pshufd xmm2, xmm7, 78
  4834. movdqa xmm3, xmm7
  4835. movdqa xmm0, xmm7
  4836. pclmulqdq xmm3, xmm12, 17
  4837. pclmulqdq xmm0, xmm12, 0
  4838. pxor xmm1, xmm12
  4839. pxor xmm2, xmm7
  4840. pclmulqdq xmm1, xmm2, 0
  4841. pxor xmm1, xmm0
  4842. pxor xmm1, xmm3
  4843. movdqa xmm2, xmm1
  4844. pxor xmm4, xmm0
  4845. pxor xmm6, xmm3
  4846. pslldq xmm2, 8
  4847. psrldq xmm1, 8
  4848. pxor xmm4, xmm2
  4849. pxor xmm6, xmm1
  4850. movdqu xmm7, [rsp+32]
  4851. pshufd xmm1, xmm13, 78
  4852. pshufd xmm2, xmm7, 78
  4853. movdqa xmm3, xmm7
  4854. movdqa xmm0, xmm7
  4855. pclmulqdq xmm3, xmm13, 17
  4856. pclmulqdq xmm0, xmm13, 0
  4857. pxor xmm1, xmm13
  4858. pxor xmm2, xmm7
  4859. pclmulqdq xmm1, xmm2, 0
  4860. pxor xmm1, xmm0
  4861. pxor xmm1, xmm3
  4862. movdqa xmm2, xmm1
  4863. pxor xmm4, xmm0
  4864. pxor xmm6, xmm3
  4865. pslldq xmm2, 8
  4866. psrldq xmm1, 8
  4867. pxor xmm4, xmm2
  4868. pxor xmm6, xmm1
  4869. movdqu xmm7, [rsp+16]
  4870. pshufd xmm1, xmm14, 78
  4871. pshufd xmm2, xmm7, 78
  4872. movdqa xmm3, xmm7
  4873. movdqa xmm0, xmm7
  4874. pclmulqdq xmm3, xmm14, 17
  4875. pclmulqdq xmm0, xmm14, 0
  4876. pxor xmm1, xmm14
  4877. pxor xmm2, xmm7
  4878. pclmulqdq xmm1, xmm2, 0
  4879. pxor xmm1, xmm0
  4880. pxor xmm1, xmm3
  4881. movdqa xmm2, xmm1
  4882. pxor xmm4, xmm0
  4883. pxor xmm6, xmm3
  4884. pslldq xmm2, 8
  4885. psrldq xmm1, 8
  4886. pxor xmm4, xmm2
  4887. pxor xmm6, xmm1
  4888. movdqu xmm7, [rsp]
  4889. pshufd xmm1, xmm15, 78
  4890. pshufd xmm2, xmm7, 78
  4891. movdqa xmm3, xmm7
  4892. movdqa xmm0, xmm7
  4893. pclmulqdq xmm3, xmm15, 17
  4894. pclmulqdq xmm0, xmm15, 0
  4895. pxor xmm1, xmm15
  4896. pxor xmm2, xmm7
  4897. pclmulqdq xmm1, xmm2, 0
  4898. pxor xmm1, xmm0
  4899. pxor xmm1, xmm3
  4900. movdqa xmm2, xmm1
  4901. pxor xmm4, xmm0
  4902. pxor xmm6, xmm3
  4903. pslldq xmm2, 8
  4904. psrldq xmm1, 8
  4905. pxor xmm4, xmm2
  4906. pxor xmm6, xmm1
  4907. movdqa xmm0, xmm4
  4908. movdqa xmm1, xmm4
  4909. movdqa xmm2, xmm4
  4910. pslld xmm0, 31
  4911. pslld xmm1, 30
  4912. pslld xmm2, 25
  4913. pxor xmm0, xmm1
  4914. pxor xmm0, xmm2
  4915. movdqa xmm1, xmm0
  4916. psrldq xmm1, 4
  4917. pslldq xmm0, 12
  4918. pxor xmm4, xmm0
  4919. movdqa xmm2, xmm4
  4920. movdqa xmm3, xmm4
  4921. movdqa xmm0, xmm4
  4922. psrld xmm2, 1
  4923. psrld xmm3, 2
  4924. psrld xmm0, 7
  4925. pxor xmm2, xmm3
  4926. pxor xmm2, xmm0
  4927. pxor xmm2, xmm1
  4928. pxor xmm2, xmm4
  4929. pxor xmm6, xmm2
  4930. movdqu xmm5, [rsp]
  4931. L_AES_GCM_encrypt_update_aesni_done_128:
  4932. mov edx, r9d
  4933. cmp edi, edx
  4934. jge L_AES_GCM_encrypt_update_aesni_done_enc
  4935. mov r13d, r9d
  4936. and r13d, 4294967280
  4937. cmp edi, r13d
  4938. jge L_AES_GCM_encrypt_update_aesni_last_block_done
  4939. lea rcx, QWORD PTR [r11+rdi]
  4940. lea rdx, QWORD PTR [r10+rdi]
  4941. movdqu xmm8, [r15]
  4942. movdqa xmm9, xmm8
  4943. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  4944. paddd xmm9, OWORD PTR L_aes_gcm_one
  4945. pxor xmm8, [rax]
  4946. movdqu [r15], xmm9
  4947. aesenc xmm8, [rax+16]
  4948. aesenc xmm8, [rax+32]
  4949. aesenc xmm8, [rax+48]
  4950. aesenc xmm8, [rax+64]
  4951. aesenc xmm8, [rax+80]
  4952. aesenc xmm8, [rax+96]
  4953. aesenc xmm8, [rax+112]
  4954. aesenc xmm8, [rax+128]
  4955. aesenc xmm8, [rax+144]
  4956. cmp r8d, 11
  4957. movdqa xmm9, OWORD PTR [rax+160]
  4958. jl L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last
  4959. aesenc xmm8, xmm9
  4960. aesenc xmm8, [rax+176]
  4961. cmp r8d, 13
  4962. movdqa xmm9, OWORD PTR [rax+192]
  4963. jl L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last
  4964. aesenc xmm8, xmm9
  4965. aesenc xmm8, [rax+208]
  4966. movdqa xmm9, OWORD PTR [rax+224]
  4967. L_AES_GCM_encrypt_update_aesni_aesenc_block_aesenc_avx_last:
  4968. aesenclast xmm8, xmm9
  4969. movdqu xmm9, [rcx]
  4970. pxor xmm8, xmm9
  4971. movdqu [rdx], xmm8
  4972. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  4973. pxor xmm6, xmm8
  4974. add edi, 16
  4975. cmp edi, r13d
  4976. jge L_AES_GCM_encrypt_update_aesni_last_block_ghash
  4977. L_AES_GCM_encrypt_update_aesni_last_block_start:
  4978. lea rcx, QWORD PTR [r11+rdi]
  4979. lea rdx, QWORD PTR [r10+rdi]
  4980. movdqu xmm8, [r15]
  4981. movdqa xmm9, xmm8
  4982. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  4983. paddd xmm9, OWORD PTR L_aes_gcm_one
  4984. pxor xmm8, [rax]
  4985. movdqu [r15], xmm9
  4986. movdqa xmm10, xmm6
  4987. pclmulqdq xmm10, xmm5, 16
  4988. aesenc xmm8, [rax+16]
  4989. aesenc xmm8, [rax+32]
  4990. movdqa xmm11, xmm6
  4991. pclmulqdq xmm11, xmm5, 1
  4992. aesenc xmm8, [rax+48]
  4993. aesenc xmm8, [rax+64]
  4994. movdqa xmm12, xmm6
  4995. pclmulqdq xmm12, xmm5, 0
  4996. aesenc xmm8, [rax+80]
  4997. movdqa xmm1, xmm6
  4998. pclmulqdq xmm1, xmm5, 17
  4999. aesenc xmm8, [rax+96]
  5000. pxor xmm10, xmm11
  5001. movdqa xmm2, xmm10
  5002. psrldq xmm10, 8
  5003. pslldq xmm2, 8
  5004. aesenc xmm8, [rax+112]
  5005. movdqa xmm3, xmm1
  5006. pxor xmm2, xmm12
  5007. pxor xmm3, xmm10
  5008. movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
  5009. movdqa xmm11, xmm2
  5010. pclmulqdq xmm11, xmm0, 16
  5011. aesenc xmm8, [rax+128]
  5012. pshufd xmm10, xmm2, 78
  5013. pxor xmm10, xmm11
  5014. movdqa xmm11, xmm10
  5015. pclmulqdq xmm11, xmm0, 16
  5016. aesenc xmm8, [rax+144]
  5017. pshufd xmm6, xmm10, 78
  5018. pxor xmm6, xmm11
  5019. pxor xmm6, xmm3
  5020. cmp r8d, 11
  5021. movdqa xmm9, OWORD PTR [rax+160]
  5022. jl L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last
  5023. aesenc xmm8, xmm9
  5024. aesenc xmm8, [rax+176]
  5025. cmp r8d, 13
  5026. movdqa xmm9, OWORD PTR [rax+192]
  5027. jl L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last
  5028. aesenc xmm8, xmm9
  5029. aesenc xmm8, [rax+208]
  5030. movdqa xmm9, OWORD PTR [rax+224]
  5031. L_AES_GCM_encrypt_update_aesni_aesenc_gfmul_last:
  5032. aesenclast xmm8, xmm9
  5033. movdqu xmm9, [rcx]
  5034. pxor xmm8, xmm9
  5035. movdqu [rdx], xmm8
  5036. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_mask
  5037. pxor xmm6, xmm8
  5038. add edi, 16
  5039. cmp edi, r13d
  5040. jl L_AES_GCM_encrypt_update_aesni_last_block_start
  5041. L_AES_GCM_encrypt_update_aesni_last_block_ghash:
  5042. pshufd xmm9, xmm5, 78
  5043. pshufd xmm10, xmm6, 78
  5044. movdqa xmm11, xmm6
  5045. movdqa xmm8, xmm6
  5046. pclmulqdq xmm11, xmm5, 17
  5047. pclmulqdq xmm8, xmm5, 0
  5048. pxor xmm9, xmm5
  5049. pxor xmm10, xmm6
  5050. pclmulqdq xmm9, xmm10, 0
  5051. pxor xmm9, xmm8
  5052. pxor xmm9, xmm11
  5053. movdqa xmm10, xmm9
  5054. movdqa xmm6, xmm11
  5055. pslldq xmm10, 8
  5056. psrldq xmm9, 8
  5057. pxor xmm8, xmm10
  5058. pxor xmm6, xmm9
  5059. movdqa xmm12, xmm8
  5060. movdqa xmm13, xmm8
  5061. movdqa xmm14, xmm8
  5062. pslld xmm12, 31
  5063. pslld xmm13, 30
  5064. pslld xmm14, 25
  5065. pxor xmm12, xmm13
  5066. pxor xmm12, xmm14
  5067. movdqa xmm13, xmm12
  5068. psrldq xmm13, 4
  5069. pslldq xmm12, 12
  5070. pxor xmm8, xmm12
  5071. movdqa xmm14, xmm8
  5072. movdqa xmm10, xmm8
  5073. movdqa xmm9, xmm8
  5074. psrld xmm14, 1
  5075. psrld xmm10, 2
  5076. psrld xmm9, 7
  5077. pxor xmm14, xmm10
  5078. pxor xmm14, xmm9
  5079. pxor xmm14, xmm13
  5080. pxor xmm14, xmm8
  5081. pxor xmm6, xmm14
  5082. L_AES_GCM_encrypt_update_aesni_last_block_done:
  5083. L_AES_GCM_encrypt_update_aesni_done_enc:
  5084. movdqa OWORD PTR [r12], xmm6
  5085. add rsp, 160
  5086. pop rdi
  5087. pop r15
  5088. pop r14
  5089. pop r12
  5090. pop r13
  5091. ret
  5092. AES_GCM_encrypt_update_aesni ENDP
  5093. _text ENDS
  5094. _text SEGMENT READONLY PARA
  5095. AES_GCM_encrypt_final_aesni PROC
  5096. push r13
  5097. push r12
  5098. push r14
  5099. mov rax, rcx
  5100. mov r10d, r9d
  5101. mov r9, rdx
  5102. mov r11d, DWORD PTR [rsp+64]
  5103. mov r12, QWORD PTR [rsp+72]
  5104. mov r14, QWORD PTR [rsp+80]
  5105. sub rsp, 16
  5106. movdqa xmm4, OWORD PTR [rax]
  5107. movdqa xmm5, OWORD PTR [r12]
  5108. movdqa xmm6, OWORD PTR [r14]
  5109. movdqa xmm9, xmm5
  5110. movdqa xmm8, xmm5
  5111. psrlq xmm9, 63
  5112. psllq xmm8, 1
  5113. pslldq xmm9, 8
  5114. por xmm8, xmm9
  5115. pshufd xmm5, xmm5, 255
  5116. psrad xmm5, 31
  5117. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  5118. pxor xmm5, xmm8
  5119. mov edx, r10d
  5120. mov ecx, r11d
  5121. shl rdx, 3
  5122. shl rcx, 3
  5123. pinsrq xmm0, rdx, 0
  5124. pinsrq xmm0, rcx, 1
  5125. pxor xmm4, xmm0
  5126. pshufd xmm9, xmm5, 78
  5127. pshufd xmm10, xmm4, 78
  5128. movdqa xmm11, xmm4
  5129. movdqa xmm8, xmm4
  5130. pclmulqdq xmm11, xmm5, 17
  5131. pclmulqdq xmm8, xmm5, 0
  5132. pxor xmm9, xmm5
  5133. pxor xmm10, xmm4
  5134. pclmulqdq xmm9, xmm10, 0
  5135. pxor xmm9, xmm8
  5136. pxor xmm9, xmm11
  5137. movdqa xmm10, xmm9
  5138. movdqa xmm4, xmm11
  5139. pslldq xmm10, 8
  5140. psrldq xmm9, 8
  5141. pxor xmm8, xmm10
  5142. pxor xmm4, xmm9
  5143. movdqa xmm12, xmm8
  5144. movdqa xmm13, xmm8
  5145. movdqa xmm14, xmm8
  5146. pslld xmm12, 31
  5147. pslld xmm13, 30
  5148. pslld xmm14, 25
  5149. pxor xmm12, xmm13
  5150. pxor xmm12, xmm14
  5151. movdqa xmm13, xmm12
  5152. psrldq xmm13, 4
  5153. pslldq xmm12, 12
  5154. pxor xmm8, xmm12
  5155. movdqa xmm14, xmm8
  5156. movdqa xmm10, xmm8
  5157. movdqa xmm9, xmm8
  5158. psrld xmm14, 1
  5159. psrld xmm10, 2
  5160. psrld xmm9, 7
  5161. pxor xmm14, xmm10
  5162. pxor xmm14, xmm9
  5163. pxor xmm14, xmm13
  5164. pxor xmm14, xmm8
  5165. pxor xmm4, xmm14
  5166. pshufb xmm4, OWORD PTR L_aes_gcm_bswap_mask
  5167. movdqu xmm0, xmm6
  5168. pxor xmm0, xmm4
  5169. cmp r8d, 16
  5170. je L_AES_GCM_encrypt_final_aesni_store_tag_16
  5171. xor rcx, rcx
  5172. movdqu [rsp], xmm0
  5173. L_AES_GCM_encrypt_final_aesni_store_tag_loop:
  5174. movzx r13d, BYTE PTR [rsp+rcx]
  5175. mov BYTE PTR [r9+rcx], r13b
  5176. inc ecx
  5177. cmp ecx, r8d
  5178. jne L_AES_GCM_encrypt_final_aesni_store_tag_loop
  5179. jmp L_AES_GCM_encrypt_final_aesni_store_tag_done
  5180. L_AES_GCM_encrypt_final_aesni_store_tag_16:
  5181. movdqu [r9], xmm0
  5182. L_AES_GCM_encrypt_final_aesni_store_tag_done:
  5183. add rsp, 16
  5184. pop r14
  5185. pop r12
  5186. pop r13
  5187. ret
  5188. AES_GCM_encrypt_final_aesni ENDP
  5189. _text ENDS
  5190. _text SEGMENT READONLY PARA
  5191. AES_GCM_decrypt_update_aesni PROC
  5192. push r13
  5193. push r12
  5194. push r14
  5195. push r15
  5196. push rdi
  5197. push rsi
  5198. mov rax, rcx
  5199. mov r10, r8
  5200. mov r8d, edx
  5201. mov r11, r9
  5202. mov r9d, DWORD PTR [rsp+88]
  5203. mov r12, QWORD PTR [rsp+96]
  5204. mov r14, QWORD PTR [rsp+104]
  5205. mov r15, QWORD PTR [rsp+112]
  5206. sub rsp, 168
  5207. movdqa xmm6, OWORD PTR [r12]
  5208. movdqa xmm5, OWORD PTR [r14]
  5209. movdqa xmm9, xmm5
  5210. movdqa xmm8, xmm5
  5211. psrlq xmm9, 63
  5212. psllq xmm8, 1
  5213. pslldq xmm9, 8
  5214. por xmm8, xmm9
  5215. pshufd xmm5, xmm5, 255
  5216. psrad xmm5, 31
  5217. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  5218. pxor xmm5, xmm8
  5219. xor edi, edi
  5220. cmp r9d, 128
  5221. mov r13d, r9d
  5222. jl L_AES_GCM_decrypt_update_aesni_done_128
  5223. and r13d, 4294967168
  5224. movdqa xmm2, xmm6
  5225. ; H ^ 1
  5226. movdqu [rsp], xmm5
  5227. ; H ^ 2
  5228. pshufd xmm9, xmm5, 78
  5229. pshufd xmm10, xmm5, 78
  5230. movdqa xmm11, xmm5
  5231. movdqa xmm8, xmm5
  5232. pclmulqdq xmm11, xmm5, 17
  5233. pclmulqdq xmm8, xmm5, 0
  5234. pxor xmm9, xmm5
  5235. pxor xmm10, xmm5
  5236. pclmulqdq xmm9, xmm10, 0
  5237. pxor xmm9, xmm8
  5238. pxor xmm9, xmm11
  5239. movdqa xmm10, xmm9
  5240. movdqa xmm0, xmm11
  5241. pslldq xmm10, 8
  5242. psrldq xmm9, 8
  5243. pxor xmm8, xmm10
  5244. pxor xmm0, xmm9
  5245. movdqa xmm12, xmm8
  5246. movdqa xmm13, xmm8
  5247. movdqa xmm14, xmm8
  5248. pslld xmm12, 31
  5249. pslld xmm13, 30
  5250. pslld xmm14, 25
  5251. pxor xmm12, xmm13
  5252. pxor xmm12, xmm14
  5253. movdqa xmm13, xmm12
  5254. psrldq xmm13, 4
  5255. pslldq xmm12, 12
  5256. pxor xmm8, xmm12
  5257. movdqa xmm14, xmm8
  5258. movdqa xmm10, xmm8
  5259. movdqa xmm9, xmm8
  5260. psrld xmm14, 1
  5261. psrld xmm10, 2
  5262. psrld xmm9, 7
  5263. pxor xmm14, xmm10
  5264. pxor xmm14, xmm9
  5265. pxor xmm14, xmm13
  5266. pxor xmm14, xmm8
  5267. pxor xmm0, xmm14
  5268. movdqu [rsp+16], xmm0
  5269. ; H ^ 3
  5270. pshufd xmm9, xmm5, 78
  5271. pshufd xmm10, xmm0, 78
  5272. movdqa xmm11, xmm0
  5273. movdqa xmm8, xmm0
  5274. pclmulqdq xmm11, xmm5, 17
  5275. pclmulqdq xmm8, xmm5, 0
  5276. pxor xmm9, xmm5
  5277. pxor xmm10, xmm0
  5278. pclmulqdq xmm9, xmm10, 0
  5279. pxor xmm9, xmm8
  5280. pxor xmm9, xmm11
  5281. movdqa xmm10, xmm9
  5282. movdqa xmm1, xmm11
  5283. pslldq xmm10, 8
  5284. psrldq xmm9, 8
  5285. pxor xmm8, xmm10
  5286. pxor xmm1, xmm9
  5287. movdqa xmm12, xmm8
  5288. movdqa xmm13, xmm8
  5289. movdqa xmm14, xmm8
  5290. pslld xmm12, 31
  5291. pslld xmm13, 30
  5292. pslld xmm14, 25
  5293. pxor xmm12, xmm13
  5294. pxor xmm12, xmm14
  5295. movdqa xmm13, xmm12
  5296. psrldq xmm13, 4
  5297. pslldq xmm12, 12
  5298. pxor xmm8, xmm12
  5299. movdqa xmm14, xmm8
  5300. movdqa xmm10, xmm8
  5301. movdqa xmm9, xmm8
  5302. psrld xmm14, 1
  5303. psrld xmm10, 2
  5304. psrld xmm9, 7
  5305. pxor xmm14, xmm10
  5306. pxor xmm14, xmm9
  5307. pxor xmm14, xmm13
  5308. pxor xmm14, xmm8
  5309. pxor xmm1, xmm14
  5310. movdqu [rsp+32], xmm1
  5311. ; H ^ 4
  5312. pshufd xmm9, xmm0, 78
  5313. pshufd xmm10, xmm0, 78
  5314. movdqa xmm11, xmm0
  5315. movdqa xmm8, xmm0
  5316. pclmulqdq xmm11, xmm0, 17
  5317. pclmulqdq xmm8, xmm0, 0
  5318. pxor xmm9, xmm0
  5319. pxor xmm10, xmm0
  5320. pclmulqdq xmm9, xmm10, 0
  5321. pxor xmm9, xmm8
  5322. pxor xmm9, xmm11
  5323. movdqa xmm10, xmm9
  5324. movdqa xmm3, xmm11
  5325. pslldq xmm10, 8
  5326. psrldq xmm9, 8
  5327. pxor xmm8, xmm10
  5328. pxor xmm3, xmm9
  5329. movdqa xmm12, xmm8
  5330. movdqa xmm13, xmm8
  5331. movdqa xmm14, xmm8
  5332. pslld xmm12, 31
  5333. pslld xmm13, 30
  5334. pslld xmm14, 25
  5335. pxor xmm12, xmm13
  5336. pxor xmm12, xmm14
  5337. movdqa xmm13, xmm12
  5338. psrldq xmm13, 4
  5339. pslldq xmm12, 12
  5340. pxor xmm8, xmm12
  5341. movdqa xmm14, xmm8
  5342. movdqa xmm10, xmm8
  5343. movdqa xmm9, xmm8
  5344. psrld xmm14, 1
  5345. psrld xmm10, 2
  5346. psrld xmm9, 7
  5347. pxor xmm14, xmm10
  5348. pxor xmm14, xmm9
  5349. pxor xmm14, xmm13
  5350. pxor xmm14, xmm8
  5351. pxor xmm3, xmm14
  5352. movdqu [rsp+48], xmm3
  5353. ; H ^ 5
  5354. pshufd xmm9, xmm0, 78
  5355. pshufd xmm10, xmm1, 78
  5356. movdqa xmm11, xmm1
  5357. movdqa xmm8, xmm1
  5358. pclmulqdq xmm11, xmm0, 17
  5359. pclmulqdq xmm8, xmm0, 0
  5360. pxor xmm9, xmm0
  5361. pxor xmm10, xmm1
  5362. pclmulqdq xmm9, xmm10, 0
  5363. pxor xmm9, xmm8
  5364. pxor xmm9, xmm11
  5365. movdqa xmm10, xmm9
  5366. movdqa xmm7, xmm11
  5367. pslldq xmm10, 8
  5368. psrldq xmm9, 8
  5369. pxor xmm8, xmm10
  5370. pxor xmm7, xmm9
  5371. movdqa xmm12, xmm8
  5372. movdqa xmm13, xmm8
  5373. movdqa xmm14, xmm8
  5374. pslld xmm12, 31
  5375. pslld xmm13, 30
  5376. pslld xmm14, 25
  5377. pxor xmm12, xmm13
  5378. pxor xmm12, xmm14
  5379. movdqa xmm13, xmm12
  5380. psrldq xmm13, 4
  5381. pslldq xmm12, 12
  5382. pxor xmm8, xmm12
  5383. movdqa xmm14, xmm8
  5384. movdqa xmm10, xmm8
  5385. movdqa xmm9, xmm8
  5386. psrld xmm14, 1
  5387. psrld xmm10, 2
  5388. psrld xmm9, 7
  5389. pxor xmm14, xmm10
  5390. pxor xmm14, xmm9
  5391. pxor xmm14, xmm13
  5392. pxor xmm14, xmm8
  5393. pxor xmm7, xmm14
  5394. movdqu [rsp+64], xmm7
  5395. ; H ^ 6
  5396. pshufd xmm9, xmm1, 78
  5397. pshufd xmm10, xmm1, 78
  5398. movdqa xmm11, xmm1
  5399. movdqa xmm8, xmm1
  5400. pclmulqdq xmm11, xmm1, 17
  5401. pclmulqdq xmm8, xmm1, 0
  5402. pxor xmm9, xmm1
  5403. pxor xmm10, xmm1
  5404. pclmulqdq xmm9, xmm10, 0
  5405. pxor xmm9, xmm8
  5406. pxor xmm9, xmm11
  5407. movdqa xmm10, xmm9
  5408. movdqa xmm7, xmm11
  5409. pslldq xmm10, 8
  5410. psrldq xmm9, 8
  5411. pxor xmm8, xmm10
  5412. pxor xmm7, xmm9
  5413. movdqa xmm12, xmm8
  5414. movdqa xmm13, xmm8
  5415. movdqa xmm14, xmm8
  5416. pslld xmm12, 31
  5417. pslld xmm13, 30
  5418. pslld xmm14, 25
  5419. pxor xmm12, xmm13
  5420. pxor xmm12, xmm14
  5421. movdqa xmm13, xmm12
  5422. psrldq xmm13, 4
  5423. pslldq xmm12, 12
  5424. pxor xmm8, xmm12
  5425. movdqa xmm14, xmm8
  5426. movdqa xmm10, xmm8
  5427. movdqa xmm9, xmm8
  5428. psrld xmm14, 1
  5429. psrld xmm10, 2
  5430. psrld xmm9, 7
  5431. pxor xmm14, xmm10
  5432. pxor xmm14, xmm9
  5433. pxor xmm14, xmm13
  5434. pxor xmm14, xmm8
  5435. pxor xmm7, xmm14
  5436. movdqu [rsp+80], xmm7
  5437. ; H ^ 7
  5438. pshufd xmm9, xmm1, 78
  5439. pshufd xmm10, xmm3, 78
  5440. movdqa xmm11, xmm3
  5441. movdqa xmm8, xmm3
  5442. pclmulqdq xmm11, xmm1, 17
  5443. pclmulqdq xmm8, xmm1, 0
  5444. pxor xmm9, xmm1
  5445. pxor xmm10, xmm3
  5446. pclmulqdq xmm9, xmm10, 0
  5447. pxor xmm9, xmm8
  5448. pxor xmm9, xmm11
  5449. movdqa xmm10, xmm9
  5450. movdqa xmm7, xmm11
  5451. pslldq xmm10, 8
  5452. psrldq xmm9, 8
  5453. pxor xmm8, xmm10
  5454. pxor xmm7, xmm9
  5455. movdqa xmm12, xmm8
  5456. movdqa xmm13, xmm8
  5457. movdqa xmm14, xmm8
  5458. pslld xmm12, 31
  5459. pslld xmm13, 30
  5460. pslld xmm14, 25
  5461. pxor xmm12, xmm13
  5462. pxor xmm12, xmm14
  5463. movdqa xmm13, xmm12
  5464. psrldq xmm13, 4
  5465. pslldq xmm12, 12
  5466. pxor xmm8, xmm12
  5467. movdqa xmm14, xmm8
  5468. movdqa xmm10, xmm8
  5469. movdqa xmm9, xmm8
  5470. psrld xmm14, 1
  5471. psrld xmm10, 2
  5472. psrld xmm9, 7
  5473. pxor xmm14, xmm10
  5474. pxor xmm14, xmm9
  5475. pxor xmm14, xmm13
  5476. pxor xmm14, xmm8
  5477. pxor xmm7, xmm14
  5478. movdqu [rsp+96], xmm7
  5479. ; H ^ 8
  5480. pshufd xmm9, xmm3, 78
  5481. pshufd xmm10, xmm3, 78
  5482. movdqa xmm11, xmm3
  5483. movdqa xmm8, xmm3
  5484. pclmulqdq xmm11, xmm3, 17
  5485. pclmulqdq xmm8, xmm3, 0
  5486. pxor xmm9, xmm3
  5487. pxor xmm10, xmm3
  5488. pclmulqdq xmm9, xmm10, 0
  5489. pxor xmm9, xmm8
  5490. pxor xmm9, xmm11
  5491. movdqa xmm10, xmm9
  5492. movdqa xmm7, xmm11
  5493. pslldq xmm10, 8
  5494. psrldq xmm9, 8
  5495. pxor xmm8, xmm10
  5496. pxor xmm7, xmm9
  5497. movdqa xmm12, xmm8
  5498. movdqa xmm13, xmm8
  5499. movdqa xmm14, xmm8
  5500. pslld xmm12, 31
  5501. pslld xmm13, 30
  5502. pslld xmm14, 25
  5503. pxor xmm12, xmm13
  5504. pxor xmm12, xmm14
  5505. movdqa xmm13, xmm12
  5506. psrldq xmm13, 4
  5507. pslldq xmm12, 12
  5508. pxor xmm8, xmm12
  5509. movdqa xmm14, xmm8
  5510. movdqa xmm10, xmm8
  5511. movdqa xmm9, xmm8
  5512. psrld xmm14, 1
  5513. psrld xmm10, 2
  5514. psrld xmm9, 7
  5515. pxor xmm14, xmm10
  5516. pxor xmm14, xmm9
  5517. pxor xmm14, xmm13
  5518. pxor xmm14, xmm8
  5519. pxor xmm7, xmm14
  5520. movdqu [rsp+112], xmm7
  5521. L_AES_GCM_decrypt_update_aesni_ghash_128:
  5522. lea rcx, QWORD PTR [r11+rdi]
  5523. lea rdx, QWORD PTR [r10+rdi]
  5524. movdqu xmm8, [r15]
  5525. movdqa xmm1, OWORD PTR L_aes_gcm_bswap_epi64
  5526. movdqa xmm0, xmm8
  5527. pshufb xmm8, xmm1
  5528. movdqa xmm9, xmm0
  5529. paddd xmm9, OWORD PTR L_aes_gcm_one
  5530. pshufb xmm9, xmm1
  5531. movdqa xmm10, xmm0
  5532. paddd xmm10, OWORD PTR L_aes_gcm_two
  5533. pshufb xmm10, xmm1
  5534. movdqa xmm11, xmm0
  5535. paddd xmm11, OWORD PTR L_aes_gcm_three
  5536. pshufb xmm11, xmm1
  5537. movdqa xmm12, xmm0
  5538. paddd xmm12, OWORD PTR L_aes_gcm_four
  5539. pshufb xmm12, xmm1
  5540. movdqa xmm13, xmm0
  5541. paddd xmm13, OWORD PTR L_aes_gcm_five
  5542. pshufb xmm13, xmm1
  5543. movdqa xmm14, xmm0
  5544. paddd xmm14, OWORD PTR L_aes_gcm_six
  5545. pshufb xmm14, xmm1
  5546. movdqa xmm15, xmm0
  5547. paddd xmm15, OWORD PTR L_aes_gcm_seven
  5548. pshufb xmm15, xmm1
  5549. paddd xmm0, OWORD PTR L_aes_gcm_eight
  5550. movdqa xmm7, OWORD PTR [rax]
  5551. movdqu [r15], xmm0
  5552. pxor xmm8, xmm7
  5553. pxor xmm9, xmm7
  5554. pxor xmm10, xmm7
  5555. pxor xmm11, xmm7
  5556. pxor xmm12, xmm7
  5557. pxor xmm13, xmm7
  5558. pxor xmm14, xmm7
  5559. pxor xmm15, xmm7
  5560. movdqu xmm7, [rsp+112]
  5561. movdqu xmm0, [rcx]
  5562. aesenc xmm8, [rax+16]
  5563. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5564. pxor xmm0, xmm2
  5565. pshufd xmm1, xmm7, 78
  5566. pshufd xmm5, xmm0, 78
  5567. pxor xmm1, xmm7
  5568. pxor xmm5, xmm0
  5569. movdqa xmm3, xmm0
  5570. pclmulqdq xmm3, xmm7, 17
  5571. aesenc xmm9, [rax+16]
  5572. aesenc xmm10, [rax+16]
  5573. movdqa xmm2, xmm0
  5574. pclmulqdq xmm2, xmm7, 0
  5575. aesenc xmm11, [rax+16]
  5576. aesenc xmm12, [rax+16]
  5577. pclmulqdq xmm1, xmm5, 0
  5578. aesenc xmm13, [rax+16]
  5579. aesenc xmm14, [rax+16]
  5580. aesenc xmm15, [rax+16]
  5581. pxor xmm1, xmm2
  5582. pxor xmm1, xmm3
  5583. movdqu xmm7, [rsp+96]
  5584. movdqu xmm0, [rcx+16]
  5585. pshufd xmm4, xmm7, 78
  5586. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5587. aesenc xmm8, [rax+32]
  5588. pxor xmm4, xmm7
  5589. pshufd xmm5, xmm0, 78
  5590. pxor xmm5, xmm0
  5591. movdqa xmm6, xmm0
  5592. pclmulqdq xmm6, xmm7, 17
  5593. aesenc xmm9, [rax+32]
  5594. aesenc xmm10, [rax+32]
  5595. pclmulqdq xmm7, xmm0, 0
  5596. aesenc xmm11, [rax+32]
  5597. aesenc xmm12, [rax+32]
  5598. pclmulqdq xmm4, xmm5, 0
  5599. aesenc xmm13, [rax+32]
  5600. aesenc xmm14, [rax+32]
  5601. aesenc xmm15, [rax+32]
  5602. pxor xmm1, xmm7
  5603. pxor xmm2, xmm7
  5604. pxor xmm1, xmm6
  5605. pxor xmm3, xmm6
  5606. pxor xmm1, xmm4
  5607. movdqu xmm7, [rsp+80]
  5608. movdqu xmm0, [rcx+32]
  5609. pshufd xmm4, xmm7, 78
  5610. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5611. aesenc xmm8, [rax+48]
  5612. pxor xmm4, xmm7
  5613. pshufd xmm5, xmm0, 78
  5614. pxor xmm5, xmm0
  5615. movdqa xmm6, xmm0
  5616. pclmulqdq xmm6, xmm7, 17
  5617. aesenc xmm9, [rax+48]
  5618. aesenc xmm10, [rax+48]
  5619. pclmulqdq xmm7, xmm0, 0
  5620. aesenc xmm11, [rax+48]
  5621. aesenc xmm12, [rax+48]
  5622. pclmulqdq xmm4, xmm5, 0
  5623. aesenc xmm13, [rax+48]
  5624. aesenc xmm14, [rax+48]
  5625. aesenc xmm15, [rax+48]
  5626. pxor xmm1, xmm7
  5627. pxor xmm2, xmm7
  5628. pxor xmm1, xmm6
  5629. pxor xmm3, xmm6
  5630. pxor xmm1, xmm4
  5631. movdqu xmm7, [rsp+64]
  5632. movdqu xmm0, [rcx+48]
  5633. pshufd xmm4, xmm7, 78
  5634. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5635. aesenc xmm8, [rax+64]
  5636. pxor xmm4, xmm7
  5637. pshufd xmm5, xmm0, 78
  5638. pxor xmm5, xmm0
  5639. movdqa xmm6, xmm0
  5640. pclmulqdq xmm6, xmm7, 17
  5641. aesenc xmm9, [rax+64]
  5642. aesenc xmm10, [rax+64]
  5643. pclmulqdq xmm7, xmm0, 0
  5644. aesenc xmm11, [rax+64]
  5645. aesenc xmm12, [rax+64]
  5646. pclmulqdq xmm4, xmm5, 0
  5647. aesenc xmm13, [rax+64]
  5648. aesenc xmm14, [rax+64]
  5649. aesenc xmm15, [rax+64]
  5650. pxor xmm1, xmm7
  5651. pxor xmm2, xmm7
  5652. pxor xmm1, xmm6
  5653. pxor xmm3, xmm6
  5654. pxor xmm1, xmm4
  5655. movdqu xmm7, [rsp+48]
  5656. movdqu xmm0, [rcx+64]
  5657. pshufd xmm4, xmm7, 78
  5658. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5659. aesenc xmm8, [rax+80]
  5660. pxor xmm4, xmm7
  5661. pshufd xmm5, xmm0, 78
  5662. pxor xmm5, xmm0
  5663. movdqa xmm6, xmm0
  5664. pclmulqdq xmm6, xmm7, 17
  5665. aesenc xmm9, [rax+80]
  5666. aesenc xmm10, [rax+80]
  5667. pclmulqdq xmm7, xmm0, 0
  5668. aesenc xmm11, [rax+80]
  5669. aesenc xmm12, [rax+80]
  5670. pclmulqdq xmm4, xmm5, 0
  5671. aesenc xmm13, [rax+80]
  5672. aesenc xmm14, [rax+80]
  5673. aesenc xmm15, [rax+80]
  5674. pxor xmm1, xmm7
  5675. pxor xmm2, xmm7
  5676. pxor xmm1, xmm6
  5677. pxor xmm3, xmm6
  5678. pxor xmm1, xmm4
  5679. movdqu xmm7, [rsp+32]
  5680. movdqu xmm0, [rcx+80]
  5681. pshufd xmm4, xmm7, 78
  5682. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5683. aesenc xmm8, [rax+96]
  5684. pxor xmm4, xmm7
  5685. pshufd xmm5, xmm0, 78
  5686. pxor xmm5, xmm0
  5687. movdqa xmm6, xmm0
  5688. pclmulqdq xmm6, xmm7, 17
  5689. aesenc xmm9, [rax+96]
  5690. aesenc xmm10, [rax+96]
  5691. pclmulqdq xmm7, xmm0, 0
  5692. aesenc xmm11, [rax+96]
  5693. aesenc xmm12, [rax+96]
  5694. pclmulqdq xmm4, xmm5, 0
  5695. aesenc xmm13, [rax+96]
  5696. aesenc xmm14, [rax+96]
  5697. aesenc xmm15, [rax+96]
  5698. pxor xmm1, xmm7
  5699. pxor xmm2, xmm7
  5700. pxor xmm1, xmm6
  5701. pxor xmm3, xmm6
  5702. pxor xmm1, xmm4
  5703. movdqu xmm7, [rsp+16]
  5704. movdqu xmm0, [rcx+96]
  5705. pshufd xmm4, xmm7, 78
  5706. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5707. aesenc xmm8, [rax+112]
  5708. pxor xmm4, xmm7
  5709. pshufd xmm5, xmm0, 78
  5710. pxor xmm5, xmm0
  5711. movdqa xmm6, xmm0
  5712. pclmulqdq xmm6, xmm7, 17
  5713. aesenc xmm9, [rax+112]
  5714. aesenc xmm10, [rax+112]
  5715. pclmulqdq xmm7, xmm0, 0
  5716. aesenc xmm11, [rax+112]
  5717. aesenc xmm12, [rax+112]
  5718. pclmulqdq xmm4, xmm5, 0
  5719. aesenc xmm13, [rax+112]
  5720. aesenc xmm14, [rax+112]
  5721. aesenc xmm15, [rax+112]
  5722. pxor xmm1, xmm7
  5723. pxor xmm2, xmm7
  5724. pxor xmm1, xmm6
  5725. pxor xmm3, xmm6
  5726. pxor xmm1, xmm4
  5727. movdqu xmm7, [rsp]
  5728. movdqu xmm0, [rcx+112]
  5729. pshufd xmm4, xmm7, 78
  5730. pshufb xmm0, OWORD PTR L_aes_gcm_bswap_mask
  5731. aesenc xmm8, [rax+128]
  5732. pxor xmm4, xmm7
  5733. pshufd xmm5, xmm0, 78
  5734. pxor xmm5, xmm0
  5735. movdqa xmm6, xmm0
  5736. pclmulqdq xmm6, xmm7, 17
  5737. aesenc xmm9, [rax+128]
  5738. aesenc xmm10, [rax+128]
  5739. pclmulqdq xmm7, xmm0, 0
  5740. aesenc xmm11, [rax+128]
  5741. aesenc xmm12, [rax+128]
  5742. pclmulqdq xmm4, xmm5, 0
  5743. aesenc xmm13, [rax+128]
  5744. aesenc xmm14, [rax+128]
  5745. aesenc xmm15, [rax+128]
  5746. pxor xmm1, xmm7
  5747. pxor xmm2, xmm7
  5748. pxor xmm1, xmm6
  5749. pxor xmm3, xmm6
  5750. pxor xmm1, xmm4
  5751. movdqa xmm5, xmm1
  5752. psrldq xmm1, 8
  5753. pslldq xmm5, 8
  5754. aesenc xmm8, [rax+144]
  5755. pxor xmm2, xmm5
  5756. pxor xmm3, xmm1
  5757. movdqa xmm7, xmm2
  5758. movdqa xmm4, xmm2
  5759. movdqa xmm5, xmm2
  5760. aesenc xmm9, [rax+144]
  5761. pslld xmm7, 31
  5762. pslld xmm4, 30
  5763. pslld xmm5, 25
  5764. aesenc xmm10, [rax+144]
  5765. pxor xmm7, xmm4
  5766. pxor xmm7, xmm5
  5767. aesenc xmm11, [rax+144]
  5768. movdqa xmm4, xmm7
  5769. pslldq xmm7, 12
  5770. psrldq xmm4, 4
  5771. aesenc xmm12, [rax+144]
  5772. pxor xmm2, xmm7
  5773. movdqa xmm5, xmm2
  5774. movdqa xmm1, xmm2
  5775. movdqa xmm0, xmm2
  5776. aesenc xmm13, [rax+144]
  5777. psrld xmm5, 1
  5778. psrld xmm1, 2
  5779. psrld xmm0, 7
  5780. aesenc xmm14, [rax+144]
  5781. pxor xmm5, xmm1
  5782. pxor xmm5, xmm0
  5783. aesenc xmm15, [rax+144]
  5784. pxor xmm5, xmm4
  5785. pxor xmm2, xmm5
  5786. pxor xmm2, xmm3
  5787. cmp r8d, 11
  5788. movdqa xmm7, OWORD PTR [rax+160]
  5789. jl L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done
  5790. aesenc xmm8, xmm7
  5791. aesenc xmm9, xmm7
  5792. aesenc xmm10, xmm7
  5793. aesenc xmm11, xmm7
  5794. aesenc xmm12, xmm7
  5795. aesenc xmm13, xmm7
  5796. aesenc xmm14, xmm7
  5797. aesenc xmm15, xmm7
  5798. movdqa xmm7, OWORD PTR [rax+176]
  5799. aesenc xmm8, xmm7
  5800. aesenc xmm9, xmm7
  5801. aesenc xmm10, xmm7
  5802. aesenc xmm11, xmm7
  5803. aesenc xmm12, xmm7
  5804. aesenc xmm13, xmm7
  5805. aesenc xmm14, xmm7
  5806. aesenc xmm15, xmm7
  5807. cmp r8d, 13
  5808. movdqa xmm7, OWORD PTR [rax+192]
  5809. jl L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done
  5810. aesenc xmm8, xmm7
  5811. aesenc xmm9, xmm7
  5812. aesenc xmm10, xmm7
  5813. aesenc xmm11, xmm7
  5814. aesenc xmm12, xmm7
  5815. aesenc xmm13, xmm7
  5816. aesenc xmm14, xmm7
  5817. aesenc xmm15, xmm7
  5818. movdqa xmm7, OWORD PTR [rax+208]
  5819. aesenc xmm8, xmm7
  5820. aesenc xmm9, xmm7
  5821. aesenc xmm10, xmm7
  5822. aesenc xmm11, xmm7
  5823. aesenc xmm12, xmm7
  5824. aesenc xmm13, xmm7
  5825. aesenc xmm14, xmm7
  5826. aesenc xmm15, xmm7
  5827. movdqa xmm7, OWORD PTR [rax+224]
  5828. L_AES_GCM_decrypt_update_aesni_aesenc_128_ghash_avx_done:
  5829. aesenclast xmm8, xmm7
  5830. aesenclast xmm9, xmm7
  5831. movdqu xmm0, [rcx]
  5832. movdqu xmm1, [rcx+16]
  5833. pxor xmm8, xmm0
  5834. pxor xmm9, xmm1
  5835. movdqu [rdx], xmm8
  5836. movdqu [rdx+16], xmm9
  5837. aesenclast xmm10, xmm7
  5838. aesenclast xmm11, xmm7
  5839. movdqu xmm0, [rcx+32]
  5840. movdqu xmm1, [rcx+48]
  5841. pxor xmm10, xmm0
  5842. pxor xmm11, xmm1
  5843. movdqu [rdx+32], xmm10
  5844. movdqu [rdx+48], xmm11
  5845. aesenclast xmm12, xmm7
  5846. aesenclast xmm13, xmm7
  5847. movdqu xmm0, [rcx+64]
  5848. movdqu xmm1, [rcx+80]
  5849. pxor xmm12, xmm0
  5850. pxor xmm13, xmm1
  5851. movdqu [rdx+64], xmm12
  5852. movdqu [rdx+80], xmm13
  5853. aesenclast xmm14, xmm7
  5854. aesenclast xmm15, xmm7
  5855. movdqu xmm0, [rcx+96]
  5856. movdqu xmm1, [rcx+112]
  5857. pxor xmm14, xmm0
  5858. pxor xmm15, xmm1
  5859. movdqu [rdx+96], xmm14
  5860. movdqu [rdx+112], xmm15
  5861. add edi, 128
  5862. cmp edi, r13d
  5863. jl L_AES_GCM_decrypt_update_aesni_ghash_128
  5864. movdqa xmm6, xmm2
  5865. movdqu xmm5, [rsp]
  5866. L_AES_GCM_decrypt_update_aesni_done_128:
  5867. mov edx, r9d
  5868. cmp edi, edx
  5869. jge L_AES_GCM_decrypt_update_aesni_done_dec
  5870. mov r13d, r9d
  5871. and r13d, 4294967280
  5872. cmp edi, r13d
  5873. jge L_AES_GCM_decrypt_update_aesni_last_block_done
  5874. L_AES_GCM_decrypt_update_aesni_last_block_start:
  5875. lea rcx, QWORD PTR [r11+rdi]
  5876. lea rdx, QWORD PTR [r10+rdi]
  5877. movdqu xmm1, [rcx]
  5878. movdqa xmm0, xmm5
  5879. pshufb xmm1, OWORD PTR L_aes_gcm_bswap_mask
  5880. pxor xmm1, xmm6
  5881. movdqu xmm8, [r15]
  5882. movdqa xmm9, xmm8
  5883. pshufb xmm8, OWORD PTR L_aes_gcm_bswap_epi64
  5884. paddd xmm9, OWORD PTR L_aes_gcm_one
  5885. pxor xmm8, [rax]
  5886. movdqu [r15], xmm9
  5887. movdqa xmm10, xmm1
  5888. pclmulqdq xmm10, xmm0, 16
  5889. aesenc xmm8, [rax+16]
  5890. aesenc xmm8, [rax+32]
  5891. movdqa xmm11, xmm1
  5892. pclmulqdq xmm11, xmm0, 1
  5893. aesenc xmm8, [rax+48]
  5894. aesenc xmm8, [rax+64]
  5895. movdqa xmm12, xmm1
  5896. pclmulqdq xmm12, xmm0, 0
  5897. aesenc xmm8, [rax+80]
  5898. movdqa xmm1, xmm1
  5899. pclmulqdq xmm1, xmm0, 17
  5900. aesenc xmm8, [rax+96]
  5901. pxor xmm10, xmm11
  5902. movdqa xmm2, xmm10
  5903. psrldq xmm10, 8
  5904. pslldq xmm2, 8
  5905. aesenc xmm8, [rax+112]
  5906. movdqa xmm3, xmm1
  5907. pxor xmm2, xmm12
  5908. pxor xmm3, xmm10
  5909. movdqa xmm0, OWORD PTR L_aes_gcm_mod2_128
  5910. movdqa xmm11, xmm2
  5911. pclmulqdq xmm11, xmm0, 16
  5912. aesenc xmm8, [rax+128]
  5913. pshufd xmm10, xmm2, 78
  5914. pxor xmm10, xmm11
  5915. movdqa xmm11, xmm10
  5916. pclmulqdq xmm11, xmm0, 16
  5917. aesenc xmm8, [rax+144]
  5918. pshufd xmm6, xmm10, 78
  5919. pxor xmm6, xmm11
  5920. pxor xmm6, xmm3
  5921. cmp r8d, 11
  5922. movdqa xmm9, OWORD PTR [rax+160]
  5923. jl L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last
  5924. aesenc xmm8, xmm9
  5925. aesenc xmm8, [rax+176]
  5926. cmp r8d, 13
  5927. movdqa xmm9, OWORD PTR [rax+192]
  5928. jl L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last
  5929. aesenc xmm8, xmm9
  5930. aesenc xmm8, [rax+208]
  5931. movdqa xmm9, OWORD PTR [rax+224]
  5932. L_AES_GCM_decrypt_update_aesni_aesenc_gfmul_last:
  5933. aesenclast xmm8, xmm9
  5934. movdqu xmm9, [rcx]
  5935. pxor xmm8, xmm9
  5936. movdqu [rdx], xmm8
  5937. add edi, 16
  5938. cmp edi, r13d
  5939. jl L_AES_GCM_decrypt_update_aesni_last_block_start
  5940. L_AES_GCM_decrypt_update_aesni_last_block_done:
  5941. L_AES_GCM_decrypt_update_aesni_done_dec:
  5942. movdqa OWORD PTR [r12], xmm6
  5943. add rsp, 168
  5944. pop rsi
  5945. pop rdi
  5946. pop r15
  5947. pop r14
  5948. pop r12
  5949. pop r13
  5950. ret
  5951. AES_GCM_decrypt_update_aesni ENDP
  5952. _text ENDS
  5953. _text SEGMENT READONLY PARA
  5954. AES_GCM_decrypt_final_aesni PROC
  5955. push r13
  5956. push r12
  5957. push r14
  5958. push rbp
  5959. push r15
  5960. mov rax, rcx
  5961. mov r10d, r9d
  5962. mov r9, rdx
  5963. mov r11d, DWORD PTR [rsp+80]
  5964. mov r12, QWORD PTR [rsp+88]
  5965. mov r14, QWORD PTR [rsp+96]
  5966. mov rbp, QWORD PTR [rsp+104]
  5967. sub rsp, 16
  5968. movdqa xmm6, OWORD PTR [rax]
  5969. movdqa xmm5, OWORD PTR [r12]
  5970. movdqa xmm15, OWORD PTR [r14]
  5971. movdqa xmm9, xmm5
  5972. movdqa xmm8, xmm5
  5973. psrlq xmm9, 63
  5974. psllq xmm8, 1
  5975. pslldq xmm9, 8
  5976. por xmm8, xmm9
  5977. pshufd xmm5, xmm5, 255
  5978. psrad xmm5, 31
  5979. pand xmm5, OWORD PTR L_aes_gcm_mod2_128
  5980. pxor xmm5, xmm8
  5981. mov edx, r10d
  5982. mov ecx, r11d
  5983. shl rdx, 3
  5984. shl rcx, 3
  5985. pinsrq xmm0, rdx, 0
  5986. pinsrq xmm0, rcx, 1
  5987. pxor xmm6, xmm0
  5988. pshufd xmm9, xmm5, 78
  5989. pshufd xmm10, xmm6, 78
  5990. movdqa xmm11, xmm6
  5991. movdqa xmm8, xmm6
  5992. pclmulqdq xmm11, xmm5, 17
  5993. pclmulqdq xmm8, xmm5, 0
  5994. pxor xmm9, xmm5
  5995. pxor xmm10, xmm6
  5996. pclmulqdq xmm9, xmm10, 0
  5997. pxor xmm9, xmm8
  5998. pxor xmm9, xmm11
  5999. movdqa xmm10, xmm9
  6000. movdqa xmm6, xmm11
  6001. pslldq xmm10, 8
  6002. psrldq xmm9, 8
  6003. pxor xmm8, xmm10
  6004. pxor xmm6, xmm9
  6005. movdqa xmm12, xmm8
  6006. movdqa xmm13, xmm8
  6007. movdqa xmm14, xmm8
  6008. pslld xmm12, 31
  6009. pslld xmm13, 30
  6010. pslld xmm14, 25
  6011. pxor xmm12, xmm13
  6012. pxor xmm12, xmm14
  6013. movdqa xmm13, xmm12
  6014. psrldq xmm13, 4
  6015. pslldq xmm12, 12
  6016. pxor xmm8, xmm12
  6017. movdqa xmm14, xmm8
  6018. movdqa xmm10, xmm8
  6019. movdqa xmm9, xmm8
  6020. psrld xmm14, 1
  6021. psrld xmm10, 2
  6022. psrld xmm9, 7
  6023. pxor xmm14, xmm10
  6024. pxor xmm14, xmm9
  6025. pxor xmm14, xmm13
  6026. pxor xmm14, xmm8
  6027. pxor xmm6, xmm14
  6028. pshufb xmm6, OWORD PTR L_aes_gcm_bswap_mask
  6029. movdqu xmm0, xmm15
  6030. pxor xmm0, xmm6
  6031. cmp r8d, 16
  6032. je L_AES_GCM_decrypt_final_aesni_cmp_tag_16
  6033. sub rsp, 16
  6034. xor rcx, rcx
  6035. xor r15, r15
  6036. movdqu [rsp], xmm0
  6037. L_AES_GCM_decrypt_final_aesni_cmp_tag_loop:
  6038. movzx r13d, BYTE PTR [rsp+rcx]
  6039. xor r13b, BYTE PTR [r9+rcx]
  6040. or r15b, r13b
  6041. inc ecx
  6042. cmp ecx, r8d
  6043. jne L_AES_GCM_decrypt_final_aesni_cmp_tag_loop
  6044. cmp r15, 0
  6045. sete r15b
  6046. add rsp, 16
  6047. xor rcx, rcx
  6048. jmp L_AES_GCM_decrypt_final_aesni_cmp_tag_done
  6049. L_AES_GCM_decrypt_final_aesni_cmp_tag_16:
  6050. movdqu xmm1, [r9]
  6051. pcmpeqb xmm0, xmm1
  6052. pmovmskb rdx, xmm0
  6053. ; %%edx == 0xFFFF then return 1 else => return 0
  6054. xor r15d, r15d
  6055. cmp edx, 65535
  6056. sete r15b
  6057. L_AES_GCM_decrypt_final_aesni_cmp_tag_done:
  6058. mov DWORD PTR [rbp], r15d
  6059. add rsp, 16
  6060. pop r15
  6061. pop rbp
  6062. pop r14
  6063. pop r12
  6064. pop r13
  6065. ret
  6066. AES_GCM_decrypt_final_aesni ENDP
  6067. _text ENDS
  6068. IFDEF HAVE_INTEL_AVX1
  6069. _DATA SEGMENT
  6070. ALIGN 16
  6071. L_avx1_aes_gcm_one QWORD 0, 1
  6072. ptr_L_avx1_aes_gcm_one QWORD L_avx1_aes_gcm_one
  6073. _DATA ENDS
  6074. _DATA SEGMENT
  6075. ALIGN 16
  6076. L_avx1_aes_gcm_two QWORD 0, 2
  6077. ptr_L_avx1_aes_gcm_two QWORD L_avx1_aes_gcm_two
  6078. _DATA ENDS
  6079. _DATA SEGMENT
  6080. ALIGN 16
  6081. L_avx1_aes_gcm_three QWORD 0, 3
  6082. ptr_L_avx1_aes_gcm_three QWORD L_avx1_aes_gcm_three
  6083. _DATA ENDS
  6084. _DATA SEGMENT
  6085. ALIGN 16
  6086. L_avx1_aes_gcm_four QWORD 0, 4
  6087. ptr_L_avx1_aes_gcm_four QWORD L_avx1_aes_gcm_four
  6088. _DATA ENDS
  6089. _DATA SEGMENT
  6090. ALIGN 16
  6091. L_avx1_aes_gcm_five QWORD 0, 5
  6092. ptr_L_avx1_aes_gcm_five QWORD L_avx1_aes_gcm_five
  6093. _DATA ENDS
  6094. _DATA SEGMENT
  6095. ALIGN 16
  6096. L_avx1_aes_gcm_six QWORD 0, 6
  6097. ptr_L_avx1_aes_gcm_six QWORD L_avx1_aes_gcm_six
  6098. _DATA ENDS
  6099. _DATA SEGMENT
  6100. ALIGN 16
  6101. L_avx1_aes_gcm_seven QWORD 0, 7
  6102. ptr_L_avx1_aes_gcm_seven QWORD L_avx1_aes_gcm_seven
  6103. _DATA ENDS
  6104. _DATA SEGMENT
  6105. ALIGN 16
  6106. L_avx1_aes_gcm_eight QWORD 0, 8
  6107. ptr_L_avx1_aes_gcm_eight QWORD L_avx1_aes_gcm_eight
  6108. _DATA ENDS
  6109. _DATA SEGMENT
  6110. ALIGN 16
  6111. L_avx1_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567
  6112. ptr_L_avx1_aes_gcm_bswap_epi64 QWORD L_avx1_aes_gcm_bswap_epi64
  6113. _DATA ENDS
  6114. _DATA SEGMENT
  6115. ALIGN 16
  6116. L_avx1_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183
  6117. ptr_L_avx1_aes_gcm_bswap_mask QWORD L_avx1_aes_gcm_bswap_mask
  6118. _DATA ENDS
  6119. _DATA SEGMENT
  6120. ALIGN 16
  6121. L_avx1_aes_gcm_mod2_128 QWORD 1, 13979173243358019584
  6122. ptr_L_avx1_aes_gcm_mod2_128 QWORD L_avx1_aes_gcm_mod2_128
  6123. _DATA ENDS
  6124. _text SEGMENT READONLY PARA
  6125. AES_GCM_encrypt_avx1 PROC
  6126. push r13
  6127. push rdi
  6128. push rsi
  6129. push r12
  6130. push rbx
  6131. push r14
  6132. push r15
  6133. mov rdi, rcx
  6134. mov rsi, rdx
  6135. mov r12, r8
  6136. mov rax, r9
  6137. mov r8, QWORD PTR [rsp+96]
  6138. mov r9d, DWORD PTR [rsp+104]
  6139. mov r11d, DWORD PTR [rsp+112]
  6140. mov ebx, DWORD PTR [rsp+120]
  6141. mov r14d, DWORD PTR [rsp+128]
  6142. mov r15, QWORD PTR [rsp+136]
  6143. mov r10d, DWORD PTR [rsp+144]
  6144. sub rsp, 160
  6145. vpxor xmm4, xmm4, xmm4
  6146. vpxor xmm6, xmm6, xmm6
  6147. mov edx, ebx
  6148. cmp edx, 12
  6149. jne L_AES_GCM_encrypt_avx1_iv_not_12
  6150. ; # Calculate values when IV is 12 bytes
  6151. ; Set counter based on IV
  6152. mov ecx, 16777216
  6153. vmovq xmm4, QWORD PTR [rax]
  6154. vpinsrd xmm4, xmm4, DWORD PTR [rax+8], 2
  6155. vpinsrd xmm4, xmm4, ecx, 3
  6156. ; H = Encrypt X(=0) and T = Encrypt counter
  6157. vmovdqa xmm5, OWORD PTR [r15]
  6158. vpxor xmm1, xmm4, xmm5
  6159. vmovdqa xmm7, OWORD PTR [r15+16]
  6160. vaesenc xmm5, xmm5, xmm7
  6161. vaesenc xmm1, xmm1, xmm7
  6162. vmovdqa xmm7, OWORD PTR [r15+32]
  6163. vaesenc xmm5, xmm5, xmm7
  6164. vaesenc xmm1, xmm1, xmm7
  6165. vmovdqa xmm7, OWORD PTR [r15+48]
  6166. vaesenc xmm5, xmm5, xmm7
  6167. vaesenc xmm1, xmm1, xmm7
  6168. vmovdqa xmm7, OWORD PTR [r15+64]
  6169. vaesenc xmm5, xmm5, xmm7
  6170. vaesenc xmm1, xmm1, xmm7
  6171. vmovdqa xmm7, OWORD PTR [r15+80]
  6172. vaesenc xmm5, xmm5, xmm7
  6173. vaesenc xmm1, xmm1, xmm7
  6174. vmovdqa xmm7, OWORD PTR [r15+96]
  6175. vaesenc xmm5, xmm5, xmm7
  6176. vaesenc xmm1, xmm1, xmm7
  6177. vmovdqa xmm7, OWORD PTR [r15+112]
  6178. vaesenc xmm5, xmm5, xmm7
  6179. vaesenc xmm1, xmm1, xmm7
  6180. vmovdqa xmm7, OWORD PTR [r15+128]
  6181. vaesenc xmm5, xmm5, xmm7
  6182. vaesenc xmm1, xmm1, xmm7
  6183. vmovdqa xmm7, OWORD PTR [r15+144]
  6184. vaesenc xmm5, xmm5, xmm7
  6185. vaesenc xmm1, xmm1, xmm7
  6186. cmp r10d, 11
  6187. vmovdqa xmm7, OWORD PTR [r15+160]
  6188. jl L_AES_GCM_encrypt_avx1_calc_iv_12_last
  6189. vaesenc xmm5, xmm5, xmm7
  6190. vaesenc xmm1, xmm1, xmm7
  6191. vmovdqa xmm7, OWORD PTR [r15+176]
  6192. vaesenc xmm5, xmm5, xmm7
  6193. vaesenc xmm1, xmm1, xmm7
  6194. cmp r10d, 13
  6195. vmovdqa xmm7, OWORD PTR [r15+192]
  6196. jl L_AES_GCM_encrypt_avx1_calc_iv_12_last
  6197. vaesenc xmm5, xmm5, xmm7
  6198. vaesenc xmm1, xmm1, xmm7
  6199. vmovdqa xmm7, OWORD PTR [r15+208]
  6200. vaesenc xmm5, xmm5, xmm7
  6201. vaesenc xmm1, xmm1, xmm7
  6202. vmovdqa xmm7, OWORD PTR [r15+224]
  6203. L_AES_GCM_encrypt_avx1_calc_iv_12_last:
  6204. vaesenclast xmm5, xmm5, xmm7
  6205. vaesenclast xmm1, xmm1, xmm7
  6206. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6207. vmovdqu OWORD PTR [rsp+144], xmm1
  6208. jmp L_AES_GCM_encrypt_avx1_iv_done
  6209. L_AES_GCM_encrypt_avx1_iv_not_12:
  6210. ; Calculate values when IV is not 12 bytes
  6211. ; H = Encrypt X(=0)
  6212. vmovdqa xmm5, OWORD PTR [r15]
  6213. vaesenc xmm5, xmm5, [r15+16]
  6214. vaesenc xmm5, xmm5, [r15+32]
  6215. vaesenc xmm5, xmm5, [r15+48]
  6216. vaesenc xmm5, xmm5, [r15+64]
  6217. vaesenc xmm5, xmm5, [r15+80]
  6218. vaesenc xmm5, xmm5, [r15+96]
  6219. vaesenc xmm5, xmm5, [r15+112]
  6220. vaesenc xmm5, xmm5, [r15+128]
  6221. vaesenc xmm5, xmm5, [r15+144]
  6222. cmp r10d, 11
  6223. vmovdqa xmm9, OWORD PTR [r15+160]
  6224. jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last
  6225. vaesenc xmm5, xmm5, xmm9
  6226. vaesenc xmm5, xmm5, [r15+176]
  6227. cmp r10d, 13
  6228. vmovdqa xmm9, OWORD PTR [r15+192]
  6229. jl L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last
  6230. vaesenc xmm5, xmm5, xmm9
  6231. vaesenc xmm5, xmm5, [r15+208]
  6232. vmovdqa xmm9, OWORD PTR [r15+224]
  6233. L_AES_GCM_encrypt_avx1_calc_iv_1_aesenc_avx_last:
  6234. vaesenclast xmm5, xmm5, xmm9
  6235. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6236. ; Calc counter
  6237. ; Initialization vector
  6238. cmp edx, 0
  6239. mov rcx, 0
  6240. je L_AES_GCM_encrypt_avx1_calc_iv_done
  6241. cmp edx, 16
  6242. jl L_AES_GCM_encrypt_avx1_calc_iv_lt16
  6243. and edx, 4294967280
  6244. L_AES_GCM_encrypt_avx1_calc_iv_16_loop:
  6245. vmovdqu xmm8, OWORD PTR [rax+rcx]
  6246. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6247. vpxor xmm4, xmm4, xmm8
  6248. ; ghash_gfmul_avx
  6249. vpshufd xmm1, xmm4, 78
  6250. vpshufd xmm2, xmm5, 78
  6251. vpclmulqdq xmm3, xmm5, xmm4, 17
  6252. vpclmulqdq xmm0, xmm5, xmm4, 0
  6253. vpxor xmm1, xmm1, xmm4
  6254. vpxor xmm2, xmm2, xmm5
  6255. vpclmulqdq xmm1, xmm1, xmm2, 0
  6256. vpxor xmm1, xmm1, xmm0
  6257. vpxor xmm1, xmm1, xmm3
  6258. vmovdqa xmm7, xmm0
  6259. vmovdqa xmm4, xmm3
  6260. vpslldq xmm2, xmm1, 8
  6261. vpsrldq xmm1, xmm1, 8
  6262. vpxor xmm7, xmm7, xmm2
  6263. vpxor xmm4, xmm4, xmm1
  6264. vpsrld xmm0, xmm7, 31
  6265. vpsrld xmm1, xmm4, 31
  6266. vpslld xmm7, xmm7, 1
  6267. vpslld xmm4, xmm4, 1
  6268. vpsrldq xmm2, xmm0, 12
  6269. vpslldq xmm0, xmm0, 4
  6270. vpslldq xmm1, xmm1, 4
  6271. vpor xmm4, xmm4, xmm2
  6272. vpor xmm7, xmm7, xmm0
  6273. vpor xmm4, xmm4, xmm1
  6274. vpslld xmm0, xmm7, 31
  6275. vpslld xmm1, xmm7, 30
  6276. vpslld xmm2, xmm7, 25
  6277. vpxor xmm0, xmm0, xmm1
  6278. vpxor xmm0, xmm0, xmm2
  6279. vmovdqa xmm1, xmm0
  6280. vpsrldq xmm1, xmm1, 4
  6281. vpslldq xmm0, xmm0, 12
  6282. vpxor xmm7, xmm7, xmm0
  6283. vpsrld xmm2, xmm7, 1
  6284. vpsrld xmm3, xmm7, 2
  6285. vpsrld xmm0, xmm7, 7
  6286. vpxor xmm2, xmm2, xmm3
  6287. vpxor xmm2, xmm2, xmm0
  6288. vpxor xmm2, xmm2, xmm1
  6289. vpxor xmm2, xmm2, xmm7
  6290. vpxor xmm4, xmm4, xmm2
  6291. add ecx, 16
  6292. cmp ecx, edx
  6293. jl L_AES_GCM_encrypt_avx1_calc_iv_16_loop
  6294. mov edx, ebx
  6295. cmp ecx, edx
  6296. je L_AES_GCM_encrypt_avx1_calc_iv_done
  6297. L_AES_GCM_encrypt_avx1_calc_iv_lt16:
  6298. sub rsp, 16
  6299. vpxor xmm8, xmm8, xmm8
  6300. xor ebx, ebx
  6301. vmovdqu OWORD PTR [rsp], xmm8
  6302. L_AES_GCM_encrypt_avx1_calc_iv_loop:
  6303. movzx r13d, BYTE PTR [rax+rcx]
  6304. mov BYTE PTR [rsp+rbx], r13b
  6305. inc ecx
  6306. inc ebx
  6307. cmp ecx, edx
  6308. jl L_AES_GCM_encrypt_avx1_calc_iv_loop
  6309. vmovdqu xmm8, OWORD PTR [rsp]
  6310. add rsp, 16
  6311. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6312. vpxor xmm4, xmm4, xmm8
  6313. ; ghash_gfmul_avx
  6314. vpshufd xmm1, xmm4, 78
  6315. vpshufd xmm2, xmm5, 78
  6316. vpclmulqdq xmm3, xmm5, xmm4, 17
  6317. vpclmulqdq xmm0, xmm5, xmm4, 0
  6318. vpxor xmm1, xmm1, xmm4
  6319. vpxor xmm2, xmm2, xmm5
  6320. vpclmulqdq xmm1, xmm1, xmm2, 0
  6321. vpxor xmm1, xmm1, xmm0
  6322. vpxor xmm1, xmm1, xmm3
  6323. vmovdqa xmm7, xmm0
  6324. vmovdqa xmm4, xmm3
  6325. vpslldq xmm2, xmm1, 8
  6326. vpsrldq xmm1, xmm1, 8
  6327. vpxor xmm7, xmm7, xmm2
  6328. vpxor xmm4, xmm4, xmm1
  6329. vpsrld xmm0, xmm7, 31
  6330. vpsrld xmm1, xmm4, 31
  6331. vpslld xmm7, xmm7, 1
  6332. vpslld xmm4, xmm4, 1
  6333. vpsrldq xmm2, xmm0, 12
  6334. vpslldq xmm0, xmm0, 4
  6335. vpslldq xmm1, xmm1, 4
  6336. vpor xmm4, xmm4, xmm2
  6337. vpor xmm7, xmm7, xmm0
  6338. vpor xmm4, xmm4, xmm1
  6339. vpslld xmm0, xmm7, 31
  6340. vpslld xmm1, xmm7, 30
  6341. vpslld xmm2, xmm7, 25
  6342. vpxor xmm0, xmm0, xmm1
  6343. vpxor xmm0, xmm0, xmm2
  6344. vmovdqa xmm1, xmm0
  6345. vpsrldq xmm1, xmm1, 4
  6346. vpslldq xmm0, xmm0, 12
  6347. vpxor xmm7, xmm7, xmm0
  6348. vpsrld xmm2, xmm7, 1
  6349. vpsrld xmm3, xmm7, 2
  6350. vpsrld xmm0, xmm7, 7
  6351. vpxor xmm2, xmm2, xmm3
  6352. vpxor xmm2, xmm2, xmm0
  6353. vpxor xmm2, xmm2, xmm1
  6354. vpxor xmm2, xmm2, xmm7
  6355. vpxor xmm4, xmm4, xmm2
  6356. L_AES_GCM_encrypt_avx1_calc_iv_done:
  6357. ; T = Encrypt counter
  6358. vpxor xmm0, xmm0, xmm0
  6359. shl edx, 3
  6360. vmovq xmm0, rdx
  6361. vpxor xmm4, xmm4, xmm0
  6362. ; ghash_gfmul_avx
  6363. vpshufd xmm1, xmm4, 78
  6364. vpshufd xmm2, xmm5, 78
  6365. vpclmulqdq xmm3, xmm5, xmm4, 17
  6366. vpclmulqdq xmm0, xmm5, xmm4, 0
  6367. vpxor xmm1, xmm1, xmm4
  6368. vpxor xmm2, xmm2, xmm5
  6369. vpclmulqdq xmm1, xmm1, xmm2, 0
  6370. vpxor xmm1, xmm1, xmm0
  6371. vpxor xmm1, xmm1, xmm3
  6372. vmovdqa xmm7, xmm0
  6373. vmovdqa xmm4, xmm3
  6374. vpslldq xmm2, xmm1, 8
  6375. vpsrldq xmm1, xmm1, 8
  6376. vpxor xmm7, xmm7, xmm2
  6377. vpxor xmm4, xmm4, xmm1
  6378. vpsrld xmm0, xmm7, 31
  6379. vpsrld xmm1, xmm4, 31
  6380. vpslld xmm7, xmm7, 1
  6381. vpslld xmm4, xmm4, 1
  6382. vpsrldq xmm2, xmm0, 12
  6383. vpslldq xmm0, xmm0, 4
  6384. vpslldq xmm1, xmm1, 4
  6385. vpor xmm4, xmm4, xmm2
  6386. vpor xmm7, xmm7, xmm0
  6387. vpor xmm4, xmm4, xmm1
  6388. vpslld xmm0, xmm7, 31
  6389. vpslld xmm1, xmm7, 30
  6390. vpslld xmm2, xmm7, 25
  6391. vpxor xmm0, xmm0, xmm1
  6392. vpxor xmm0, xmm0, xmm2
  6393. vmovdqa xmm1, xmm0
  6394. vpsrldq xmm1, xmm1, 4
  6395. vpslldq xmm0, xmm0, 12
  6396. vpxor xmm7, xmm7, xmm0
  6397. vpsrld xmm2, xmm7, 1
  6398. vpsrld xmm3, xmm7, 2
  6399. vpsrld xmm0, xmm7, 7
  6400. vpxor xmm2, xmm2, xmm3
  6401. vpxor xmm2, xmm2, xmm0
  6402. vpxor xmm2, xmm2, xmm1
  6403. vpxor xmm2, xmm2, xmm7
  6404. vpxor xmm4, xmm4, xmm2
  6405. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6406. ; Encrypt counter
  6407. vmovdqa xmm8, OWORD PTR [r15]
  6408. vpxor xmm8, xmm8, xmm4
  6409. vaesenc xmm8, xmm8, [r15+16]
  6410. vaesenc xmm8, xmm8, [r15+32]
  6411. vaesenc xmm8, xmm8, [r15+48]
  6412. vaesenc xmm8, xmm8, [r15+64]
  6413. vaesenc xmm8, xmm8, [r15+80]
  6414. vaesenc xmm8, xmm8, [r15+96]
  6415. vaesenc xmm8, xmm8, [r15+112]
  6416. vaesenc xmm8, xmm8, [r15+128]
  6417. vaesenc xmm8, xmm8, [r15+144]
  6418. cmp r10d, 11
  6419. vmovdqa xmm9, OWORD PTR [r15+160]
  6420. jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last
  6421. vaesenc xmm8, xmm8, xmm9
  6422. vaesenc xmm8, xmm8, [r15+176]
  6423. cmp r10d, 13
  6424. vmovdqa xmm9, OWORD PTR [r15+192]
  6425. jl L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last
  6426. vaesenc xmm8, xmm8, xmm9
  6427. vaesenc xmm8, xmm8, [r15+208]
  6428. vmovdqa xmm9, OWORD PTR [r15+224]
  6429. L_AES_GCM_encrypt_avx1_calc_iv_2_aesenc_avx_last:
  6430. vaesenclast xmm8, xmm8, xmm9
  6431. vmovdqu OWORD PTR [rsp+144], xmm8
  6432. L_AES_GCM_encrypt_avx1_iv_done:
  6433. ; Additional authentication data
  6434. mov edx, r11d
  6435. cmp edx, 0
  6436. je L_AES_GCM_encrypt_avx1_calc_aad_done
  6437. xor ecx, ecx
  6438. cmp edx, 16
  6439. jl L_AES_GCM_encrypt_avx1_calc_aad_lt16
  6440. and edx, 4294967280
  6441. L_AES_GCM_encrypt_avx1_calc_aad_16_loop:
  6442. vmovdqu xmm8, OWORD PTR [r12+rcx]
  6443. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6444. vpxor xmm6, xmm6, xmm8
  6445. ; ghash_gfmul_avx
  6446. vpshufd xmm1, xmm6, 78
  6447. vpshufd xmm2, xmm5, 78
  6448. vpclmulqdq xmm3, xmm5, xmm6, 17
  6449. vpclmulqdq xmm0, xmm5, xmm6, 0
  6450. vpxor xmm1, xmm1, xmm6
  6451. vpxor xmm2, xmm2, xmm5
  6452. vpclmulqdq xmm1, xmm1, xmm2, 0
  6453. vpxor xmm1, xmm1, xmm0
  6454. vpxor xmm1, xmm1, xmm3
  6455. vmovdqa xmm7, xmm0
  6456. vmovdqa xmm6, xmm3
  6457. vpslldq xmm2, xmm1, 8
  6458. vpsrldq xmm1, xmm1, 8
  6459. vpxor xmm7, xmm7, xmm2
  6460. vpxor xmm6, xmm6, xmm1
  6461. vpsrld xmm0, xmm7, 31
  6462. vpsrld xmm1, xmm6, 31
  6463. vpslld xmm7, xmm7, 1
  6464. vpslld xmm6, xmm6, 1
  6465. vpsrldq xmm2, xmm0, 12
  6466. vpslldq xmm0, xmm0, 4
  6467. vpslldq xmm1, xmm1, 4
  6468. vpor xmm6, xmm6, xmm2
  6469. vpor xmm7, xmm7, xmm0
  6470. vpor xmm6, xmm6, xmm1
  6471. vpslld xmm0, xmm7, 31
  6472. vpslld xmm1, xmm7, 30
  6473. vpslld xmm2, xmm7, 25
  6474. vpxor xmm0, xmm0, xmm1
  6475. vpxor xmm0, xmm0, xmm2
  6476. vmovdqa xmm1, xmm0
  6477. vpsrldq xmm1, xmm1, 4
  6478. vpslldq xmm0, xmm0, 12
  6479. vpxor xmm7, xmm7, xmm0
  6480. vpsrld xmm2, xmm7, 1
  6481. vpsrld xmm3, xmm7, 2
  6482. vpsrld xmm0, xmm7, 7
  6483. vpxor xmm2, xmm2, xmm3
  6484. vpxor xmm2, xmm2, xmm0
  6485. vpxor xmm2, xmm2, xmm1
  6486. vpxor xmm2, xmm2, xmm7
  6487. vpxor xmm6, xmm6, xmm2
  6488. add ecx, 16
  6489. cmp ecx, edx
  6490. jl L_AES_GCM_encrypt_avx1_calc_aad_16_loop
  6491. mov edx, r11d
  6492. cmp ecx, edx
  6493. je L_AES_GCM_encrypt_avx1_calc_aad_done
  6494. L_AES_GCM_encrypt_avx1_calc_aad_lt16:
  6495. sub rsp, 16
  6496. vpxor xmm8, xmm8, xmm8
  6497. xor ebx, ebx
  6498. vmovdqu OWORD PTR [rsp], xmm8
  6499. L_AES_GCM_encrypt_avx1_calc_aad_loop:
  6500. movzx r13d, BYTE PTR [r12+rcx]
  6501. mov BYTE PTR [rsp+rbx], r13b
  6502. inc ecx
  6503. inc ebx
  6504. cmp ecx, edx
  6505. jl L_AES_GCM_encrypt_avx1_calc_aad_loop
  6506. vmovdqu xmm8, OWORD PTR [rsp]
  6507. add rsp, 16
  6508. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6509. vpxor xmm6, xmm6, xmm8
  6510. ; ghash_gfmul_avx
  6511. vpshufd xmm1, xmm6, 78
  6512. vpshufd xmm2, xmm5, 78
  6513. vpclmulqdq xmm3, xmm5, xmm6, 17
  6514. vpclmulqdq xmm0, xmm5, xmm6, 0
  6515. vpxor xmm1, xmm1, xmm6
  6516. vpxor xmm2, xmm2, xmm5
  6517. vpclmulqdq xmm1, xmm1, xmm2, 0
  6518. vpxor xmm1, xmm1, xmm0
  6519. vpxor xmm1, xmm1, xmm3
  6520. vmovdqa xmm7, xmm0
  6521. vmovdqa xmm6, xmm3
  6522. vpslldq xmm2, xmm1, 8
  6523. vpsrldq xmm1, xmm1, 8
  6524. vpxor xmm7, xmm7, xmm2
  6525. vpxor xmm6, xmm6, xmm1
  6526. vpsrld xmm0, xmm7, 31
  6527. vpsrld xmm1, xmm6, 31
  6528. vpslld xmm7, xmm7, 1
  6529. vpslld xmm6, xmm6, 1
  6530. vpsrldq xmm2, xmm0, 12
  6531. vpslldq xmm0, xmm0, 4
  6532. vpslldq xmm1, xmm1, 4
  6533. vpor xmm6, xmm6, xmm2
  6534. vpor xmm7, xmm7, xmm0
  6535. vpor xmm6, xmm6, xmm1
  6536. vpslld xmm0, xmm7, 31
  6537. vpslld xmm1, xmm7, 30
  6538. vpslld xmm2, xmm7, 25
  6539. vpxor xmm0, xmm0, xmm1
  6540. vpxor xmm0, xmm0, xmm2
  6541. vmovdqa xmm1, xmm0
  6542. vpsrldq xmm1, xmm1, 4
  6543. vpslldq xmm0, xmm0, 12
  6544. vpxor xmm7, xmm7, xmm0
  6545. vpsrld xmm2, xmm7, 1
  6546. vpsrld xmm3, xmm7, 2
  6547. vpsrld xmm0, xmm7, 7
  6548. vpxor xmm2, xmm2, xmm3
  6549. vpxor xmm2, xmm2, xmm0
  6550. vpxor xmm2, xmm2, xmm1
  6551. vpxor xmm2, xmm2, xmm7
  6552. vpxor xmm6, xmm6, xmm2
  6553. L_AES_GCM_encrypt_avx1_calc_aad_done:
  6554. ; Calculate counter and H
  6555. vpsrlq xmm9, xmm5, 63
  6556. vpsllq xmm8, xmm5, 1
  6557. vpslldq xmm9, xmm9, 8
  6558. vpor xmm8, xmm8, xmm9
  6559. vpshufd xmm5, xmm5, 255
  6560. vpsrad xmm5, xmm5, 31
  6561. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  6562. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  6563. vpaddd xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_one
  6564. vpxor xmm5, xmm5, xmm8
  6565. vmovdqu OWORD PTR [rsp+128], xmm4
  6566. xor ebx, ebx
  6567. cmp r9d, 128
  6568. mov r13d, r9d
  6569. jl L_AES_GCM_encrypt_avx1_done_128
  6570. and r13d, 4294967168
  6571. vmovdqa xmm2, xmm6
  6572. ; H ^ 1
  6573. vmovdqu OWORD PTR [rsp], xmm5
  6574. ; H ^ 2
  6575. vpclmulqdq xmm8, xmm5, xmm5, 0
  6576. vpclmulqdq xmm0, xmm5, xmm5, 17
  6577. vpslld xmm12, xmm8, 31
  6578. vpslld xmm13, xmm8, 30
  6579. vpslld xmm14, xmm8, 25
  6580. vpxor xmm12, xmm12, xmm13
  6581. vpxor xmm12, xmm12, xmm14
  6582. vpsrldq xmm13, xmm12, 4
  6583. vpslldq xmm12, xmm12, 12
  6584. vpxor xmm8, xmm8, xmm12
  6585. vpsrld xmm14, xmm8, 1
  6586. vpsrld xmm10, xmm8, 2
  6587. vpsrld xmm9, xmm8, 7
  6588. vpxor xmm14, xmm14, xmm10
  6589. vpxor xmm14, xmm14, xmm9
  6590. vpxor xmm14, xmm14, xmm13
  6591. vpxor xmm14, xmm14, xmm8
  6592. vpxor xmm0, xmm0, xmm14
  6593. vmovdqu OWORD PTR [rsp+16], xmm0
  6594. ; H ^ 3
  6595. ; ghash_gfmul_red_avx
  6596. vpshufd xmm9, xmm5, 78
  6597. vpshufd xmm10, xmm0, 78
  6598. vpclmulqdq xmm11, xmm0, xmm5, 17
  6599. vpclmulqdq xmm8, xmm0, xmm5, 0
  6600. vpxor xmm9, xmm9, xmm5
  6601. vpxor xmm10, xmm10, xmm0
  6602. vpclmulqdq xmm9, xmm9, xmm10, 0
  6603. vpxor xmm9, xmm9, xmm8
  6604. vpxor xmm9, xmm9, xmm11
  6605. vpslldq xmm10, xmm9, 8
  6606. vpsrldq xmm9, xmm9, 8
  6607. vpxor xmm8, xmm8, xmm10
  6608. vpxor xmm1, xmm11, xmm9
  6609. vpslld xmm12, xmm8, 31
  6610. vpslld xmm13, xmm8, 30
  6611. vpslld xmm14, xmm8, 25
  6612. vpxor xmm12, xmm12, xmm13
  6613. vpxor xmm12, xmm12, xmm14
  6614. vpsrldq xmm13, xmm12, 4
  6615. vpslldq xmm12, xmm12, 12
  6616. vpxor xmm8, xmm8, xmm12
  6617. vpsrld xmm14, xmm8, 1
  6618. vpsrld xmm10, xmm8, 2
  6619. vpsrld xmm9, xmm8, 7
  6620. vpxor xmm14, xmm14, xmm10
  6621. vpxor xmm14, xmm14, xmm9
  6622. vpxor xmm14, xmm14, xmm13
  6623. vpxor xmm14, xmm14, xmm8
  6624. vpxor xmm1, xmm1, xmm14
  6625. vmovdqu OWORD PTR [rsp+32], xmm1
  6626. ; H ^ 4
  6627. vpclmulqdq xmm8, xmm0, xmm0, 0
  6628. vpclmulqdq xmm3, xmm0, xmm0, 17
  6629. vpslld xmm12, xmm8, 31
  6630. vpslld xmm13, xmm8, 30
  6631. vpslld xmm14, xmm8, 25
  6632. vpxor xmm12, xmm12, xmm13
  6633. vpxor xmm12, xmm12, xmm14
  6634. vpsrldq xmm13, xmm12, 4
  6635. vpslldq xmm12, xmm12, 12
  6636. vpxor xmm8, xmm8, xmm12
  6637. vpsrld xmm14, xmm8, 1
  6638. vpsrld xmm10, xmm8, 2
  6639. vpsrld xmm9, xmm8, 7
  6640. vpxor xmm14, xmm14, xmm10
  6641. vpxor xmm14, xmm14, xmm9
  6642. vpxor xmm14, xmm14, xmm13
  6643. vpxor xmm14, xmm14, xmm8
  6644. vpxor xmm3, xmm3, xmm14
  6645. vmovdqu OWORD PTR [rsp+48], xmm3
  6646. ; H ^ 5
  6647. ; ghash_gfmul_red_avx
  6648. vpshufd xmm9, xmm0, 78
  6649. vpshufd xmm10, xmm1, 78
  6650. vpclmulqdq xmm11, xmm1, xmm0, 17
  6651. vpclmulqdq xmm8, xmm1, xmm0, 0
  6652. vpxor xmm9, xmm9, xmm0
  6653. vpxor xmm10, xmm10, xmm1
  6654. vpclmulqdq xmm9, xmm9, xmm10, 0
  6655. vpxor xmm9, xmm9, xmm8
  6656. vpxor xmm9, xmm9, xmm11
  6657. vpslldq xmm10, xmm9, 8
  6658. vpsrldq xmm9, xmm9, 8
  6659. vpxor xmm8, xmm8, xmm10
  6660. vpxor xmm7, xmm11, xmm9
  6661. vpslld xmm12, xmm8, 31
  6662. vpslld xmm13, xmm8, 30
  6663. vpslld xmm14, xmm8, 25
  6664. vpxor xmm12, xmm12, xmm13
  6665. vpxor xmm12, xmm12, xmm14
  6666. vpsrldq xmm13, xmm12, 4
  6667. vpslldq xmm12, xmm12, 12
  6668. vpxor xmm8, xmm8, xmm12
  6669. vpsrld xmm14, xmm8, 1
  6670. vpsrld xmm10, xmm8, 2
  6671. vpsrld xmm9, xmm8, 7
  6672. vpxor xmm14, xmm14, xmm10
  6673. vpxor xmm14, xmm14, xmm9
  6674. vpxor xmm14, xmm14, xmm13
  6675. vpxor xmm14, xmm14, xmm8
  6676. vpxor xmm7, xmm7, xmm14
  6677. vmovdqu OWORD PTR [rsp+64], xmm7
  6678. ; H ^ 6
  6679. vpclmulqdq xmm8, xmm1, xmm1, 0
  6680. vpclmulqdq xmm7, xmm1, xmm1, 17
  6681. vpslld xmm12, xmm8, 31
  6682. vpslld xmm13, xmm8, 30
  6683. vpslld xmm14, xmm8, 25
  6684. vpxor xmm12, xmm12, xmm13
  6685. vpxor xmm12, xmm12, xmm14
  6686. vpsrldq xmm13, xmm12, 4
  6687. vpslldq xmm12, xmm12, 12
  6688. vpxor xmm8, xmm8, xmm12
  6689. vpsrld xmm14, xmm8, 1
  6690. vpsrld xmm10, xmm8, 2
  6691. vpsrld xmm9, xmm8, 7
  6692. vpxor xmm14, xmm14, xmm10
  6693. vpxor xmm14, xmm14, xmm9
  6694. vpxor xmm14, xmm14, xmm13
  6695. vpxor xmm14, xmm14, xmm8
  6696. vpxor xmm7, xmm7, xmm14
  6697. vmovdqu OWORD PTR [rsp+80], xmm7
  6698. ; H ^ 7
  6699. ; ghash_gfmul_red_avx
  6700. vpshufd xmm9, xmm1, 78
  6701. vpshufd xmm10, xmm3, 78
  6702. vpclmulqdq xmm11, xmm3, xmm1, 17
  6703. vpclmulqdq xmm8, xmm3, xmm1, 0
  6704. vpxor xmm9, xmm9, xmm1
  6705. vpxor xmm10, xmm10, xmm3
  6706. vpclmulqdq xmm9, xmm9, xmm10, 0
  6707. vpxor xmm9, xmm9, xmm8
  6708. vpxor xmm9, xmm9, xmm11
  6709. vpslldq xmm10, xmm9, 8
  6710. vpsrldq xmm9, xmm9, 8
  6711. vpxor xmm8, xmm8, xmm10
  6712. vpxor xmm7, xmm11, xmm9
  6713. vpslld xmm12, xmm8, 31
  6714. vpslld xmm13, xmm8, 30
  6715. vpslld xmm14, xmm8, 25
  6716. vpxor xmm12, xmm12, xmm13
  6717. vpxor xmm12, xmm12, xmm14
  6718. vpsrldq xmm13, xmm12, 4
  6719. vpslldq xmm12, xmm12, 12
  6720. vpxor xmm8, xmm8, xmm12
  6721. vpsrld xmm14, xmm8, 1
  6722. vpsrld xmm10, xmm8, 2
  6723. vpsrld xmm9, xmm8, 7
  6724. vpxor xmm14, xmm14, xmm10
  6725. vpxor xmm14, xmm14, xmm9
  6726. vpxor xmm14, xmm14, xmm13
  6727. vpxor xmm14, xmm14, xmm8
  6728. vpxor xmm7, xmm7, xmm14
  6729. vmovdqu OWORD PTR [rsp+96], xmm7
  6730. ; H ^ 8
  6731. vpclmulqdq xmm8, xmm3, xmm3, 0
  6732. vpclmulqdq xmm7, xmm3, xmm3, 17
  6733. vpslld xmm12, xmm8, 31
  6734. vpslld xmm13, xmm8, 30
  6735. vpslld xmm14, xmm8, 25
  6736. vpxor xmm12, xmm12, xmm13
  6737. vpxor xmm12, xmm12, xmm14
  6738. vpsrldq xmm13, xmm12, 4
  6739. vpslldq xmm12, xmm12, 12
  6740. vpxor xmm8, xmm8, xmm12
  6741. vpsrld xmm14, xmm8, 1
  6742. vpsrld xmm10, xmm8, 2
  6743. vpsrld xmm9, xmm8, 7
  6744. vpxor xmm14, xmm14, xmm10
  6745. vpxor xmm14, xmm14, xmm9
  6746. vpxor xmm14, xmm14, xmm13
  6747. vpxor xmm14, xmm14, xmm8
  6748. vpxor xmm7, xmm7, xmm14
  6749. vmovdqu OWORD PTR [rsp+112], xmm7
  6750. ; First 128 bytes of input
  6751. vmovdqu xmm0, OWORD PTR [rsp+128]
  6752. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  6753. vpshufb xmm8, xmm0, xmm1
  6754. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  6755. vpshufb xmm9, xmm9, xmm1
  6756. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  6757. vpshufb xmm10, xmm10, xmm1
  6758. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  6759. vpshufb xmm11, xmm11, xmm1
  6760. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  6761. vpshufb xmm12, xmm12, xmm1
  6762. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  6763. vpshufb xmm13, xmm13, xmm1
  6764. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  6765. vpshufb xmm14, xmm14, xmm1
  6766. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  6767. vpshufb xmm15, xmm15, xmm1
  6768. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  6769. vmovdqa xmm7, OWORD PTR [r15]
  6770. vmovdqu OWORD PTR [rsp+128], xmm0
  6771. vpxor xmm8, xmm8, xmm7
  6772. vpxor xmm9, xmm9, xmm7
  6773. vpxor xmm10, xmm10, xmm7
  6774. vpxor xmm11, xmm11, xmm7
  6775. vpxor xmm12, xmm12, xmm7
  6776. vpxor xmm13, xmm13, xmm7
  6777. vpxor xmm14, xmm14, xmm7
  6778. vpxor xmm15, xmm15, xmm7
  6779. vmovdqa xmm7, OWORD PTR [r15+16]
  6780. vaesenc xmm8, xmm8, xmm7
  6781. vaesenc xmm9, xmm9, xmm7
  6782. vaesenc xmm10, xmm10, xmm7
  6783. vaesenc xmm11, xmm11, xmm7
  6784. vaesenc xmm12, xmm12, xmm7
  6785. vaesenc xmm13, xmm13, xmm7
  6786. vaesenc xmm14, xmm14, xmm7
  6787. vaesenc xmm15, xmm15, xmm7
  6788. vmovdqa xmm7, OWORD PTR [r15+32]
  6789. vaesenc xmm8, xmm8, xmm7
  6790. vaesenc xmm9, xmm9, xmm7
  6791. vaesenc xmm10, xmm10, xmm7
  6792. vaesenc xmm11, xmm11, xmm7
  6793. vaesenc xmm12, xmm12, xmm7
  6794. vaesenc xmm13, xmm13, xmm7
  6795. vaesenc xmm14, xmm14, xmm7
  6796. vaesenc xmm15, xmm15, xmm7
  6797. vmovdqa xmm7, OWORD PTR [r15+48]
  6798. vaesenc xmm8, xmm8, xmm7
  6799. vaesenc xmm9, xmm9, xmm7
  6800. vaesenc xmm10, xmm10, xmm7
  6801. vaesenc xmm11, xmm11, xmm7
  6802. vaesenc xmm12, xmm12, xmm7
  6803. vaesenc xmm13, xmm13, xmm7
  6804. vaesenc xmm14, xmm14, xmm7
  6805. vaesenc xmm15, xmm15, xmm7
  6806. vmovdqa xmm7, OWORD PTR [r15+64]
  6807. vaesenc xmm8, xmm8, xmm7
  6808. vaesenc xmm9, xmm9, xmm7
  6809. vaesenc xmm10, xmm10, xmm7
  6810. vaesenc xmm11, xmm11, xmm7
  6811. vaesenc xmm12, xmm12, xmm7
  6812. vaesenc xmm13, xmm13, xmm7
  6813. vaesenc xmm14, xmm14, xmm7
  6814. vaesenc xmm15, xmm15, xmm7
  6815. vmovdqa xmm7, OWORD PTR [r15+80]
  6816. vaesenc xmm8, xmm8, xmm7
  6817. vaesenc xmm9, xmm9, xmm7
  6818. vaesenc xmm10, xmm10, xmm7
  6819. vaesenc xmm11, xmm11, xmm7
  6820. vaesenc xmm12, xmm12, xmm7
  6821. vaesenc xmm13, xmm13, xmm7
  6822. vaesenc xmm14, xmm14, xmm7
  6823. vaesenc xmm15, xmm15, xmm7
  6824. vmovdqa xmm7, OWORD PTR [r15+96]
  6825. vaesenc xmm8, xmm8, xmm7
  6826. vaesenc xmm9, xmm9, xmm7
  6827. vaesenc xmm10, xmm10, xmm7
  6828. vaesenc xmm11, xmm11, xmm7
  6829. vaesenc xmm12, xmm12, xmm7
  6830. vaesenc xmm13, xmm13, xmm7
  6831. vaesenc xmm14, xmm14, xmm7
  6832. vaesenc xmm15, xmm15, xmm7
  6833. vmovdqa xmm7, OWORD PTR [r15+112]
  6834. vaesenc xmm8, xmm8, xmm7
  6835. vaesenc xmm9, xmm9, xmm7
  6836. vaesenc xmm10, xmm10, xmm7
  6837. vaesenc xmm11, xmm11, xmm7
  6838. vaesenc xmm12, xmm12, xmm7
  6839. vaesenc xmm13, xmm13, xmm7
  6840. vaesenc xmm14, xmm14, xmm7
  6841. vaesenc xmm15, xmm15, xmm7
  6842. vmovdqa xmm7, OWORD PTR [r15+128]
  6843. vaesenc xmm8, xmm8, xmm7
  6844. vaesenc xmm9, xmm9, xmm7
  6845. vaesenc xmm10, xmm10, xmm7
  6846. vaesenc xmm11, xmm11, xmm7
  6847. vaesenc xmm12, xmm12, xmm7
  6848. vaesenc xmm13, xmm13, xmm7
  6849. vaesenc xmm14, xmm14, xmm7
  6850. vaesenc xmm15, xmm15, xmm7
  6851. vmovdqa xmm7, OWORD PTR [r15+144]
  6852. vaesenc xmm8, xmm8, xmm7
  6853. vaesenc xmm9, xmm9, xmm7
  6854. vaesenc xmm10, xmm10, xmm7
  6855. vaesenc xmm11, xmm11, xmm7
  6856. vaesenc xmm12, xmm12, xmm7
  6857. vaesenc xmm13, xmm13, xmm7
  6858. vaesenc xmm14, xmm14, xmm7
  6859. vaesenc xmm15, xmm15, xmm7
  6860. cmp r10d, 11
  6861. vmovdqa xmm7, OWORD PTR [r15+160]
  6862. jl L_AES_GCM_encrypt_avx1_aesenc_128_enc_done
  6863. vaesenc xmm8, xmm8, xmm7
  6864. vaesenc xmm9, xmm9, xmm7
  6865. vaesenc xmm10, xmm10, xmm7
  6866. vaesenc xmm11, xmm11, xmm7
  6867. vaesenc xmm12, xmm12, xmm7
  6868. vaesenc xmm13, xmm13, xmm7
  6869. vaesenc xmm14, xmm14, xmm7
  6870. vaesenc xmm15, xmm15, xmm7
  6871. vmovdqa xmm7, OWORD PTR [r15+176]
  6872. vaesenc xmm8, xmm8, xmm7
  6873. vaesenc xmm9, xmm9, xmm7
  6874. vaesenc xmm10, xmm10, xmm7
  6875. vaesenc xmm11, xmm11, xmm7
  6876. vaesenc xmm12, xmm12, xmm7
  6877. vaesenc xmm13, xmm13, xmm7
  6878. vaesenc xmm14, xmm14, xmm7
  6879. vaesenc xmm15, xmm15, xmm7
  6880. cmp r10d, 13
  6881. vmovdqa xmm7, OWORD PTR [r15+192]
  6882. jl L_AES_GCM_encrypt_avx1_aesenc_128_enc_done
  6883. vaesenc xmm8, xmm8, xmm7
  6884. vaesenc xmm9, xmm9, xmm7
  6885. vaesenc xmm10, xmm10, xmm7
  6886. vaesenc xmm11, xmm11, xmm7
  6887. vaesenc xmm12, xmm12, xmm7
  6888. vaesenc xmm13, xmm13, xmm7
  6889. vaesenc xmm14, xmm14, xmm7
  6890. vaesenc xmm15, xmm15, xmm7
  6891. vmovdqa xmm7, OWORD PTR [r15+208]
  6892. vaesenc xmm8, xmm8, xmm7
  6893. vaesenc xmm9, xmm9, xmm7
  6894. vaesenc xmm10, xmm10, xmm7
  6895. vaesenc xmm11, xmm11, xmm7
  6896. vaesenc xmm12, xmm12, xmm7
  6897. vaesenc xmm13, xmm13, xmm7
  6898. vaesenc xmm14, xmm14, xmm7
  6899. vaesenc xmm15, xmm15, xmm7
  6900. vmovdqa xmm7, OWORD PTR [r15+224]
  6901. L_AES_GCM_encrypt_avx1_aesenc_128_enc_done:
  6902. vaesenclast xmm8, xmm8, xmm7
  6903. vaesenclast xmm9, xmm9, xmm7
  6904. vmovdqu xmm0, OWORD PTR [rdi]
  6905. vmovdqu xmm1, OWORD PTR [rdi+16]
  6906. vpxor xmm8, xmm8, xmm0
  6907. vpxor xmm9, xmm9, xmm1
  6908. vmovdqu OWORD PTR [rsi], xmm8
  6909. vmovdqu OWORD PTR [rsi+16], xmm9
  6910. vaesenclast xmm10, xmm10, xmm7
  6911. vaesenclast xmm11, xmm11, xmm7
  6912. vmovdqu xmm0, OWORD PTR [rdi+32]
  6913. vmovdqu xmm1, OWORD PTR [rdi+48]
  6914. vpxor xmm10, xmm10, xmm0
  6915. vpxor xmm11, xmm11, xmm1
  6916. vmovdqu OWORD PTR [rsi+32], xmm10
  6917. vmovdqu OWORD PTR [rsi+48], xmm11
  6918. vaesenclast xmm12, xmm12, xmm7
  6919. vaesenclast xmm13, xmm13, xmm7
  6920. vmovdqu xmm0, OWORD PTR [rdi+64]
  6921. vmovdqu xmm1, OWORD PTR [rdi+80]
  6922. vpxor xmm12, xmm12, xmm0
  6923. vpxor xmm13, xmm13, xmm1
  6924. vmovdqu OWORD PTR [rsi+64], xmm12
  6925. vmovdqu OWORD PTR [rsi+80], xmm13
  6926. vaesenclast xmm14, xmm14, xmm7
  6927. vaesenclast xmm15, xmm15, xmm7
  6928. vmovdqu xmm0, OWORD PTR [rdi+96]
  6929. vmovdqu xmm1, OWORD PTR [rdi+112]
  6930. vpxor xmm14, xmm14, xmm0
  6931. vpxor xmm15, xmm15, xmm1
  6932. vmovdqu OWORD PTR [rsi+96], xmm14
  6933. vmovdqu OWORD PTR [rsi+112], xmm15
  6934. cmp r13d, 128
  6935. mov ebx, 128
  6936. jle L_AES_GCM_encrypt_avx1_end_128
  6937. ; More 128 bytes of input
  6938. L_AES_GCM_encrypt_avx1_ghash_128:
  6939. lea rcx, QWORD PTR [rdi+rbx]
  6940. lea rdx, QWORD PTR [rsi+rbx]
  6941. vmovdqu xmm0, OWORD PTR [rsp+128]
  6942. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  6943. vpshufb xmm8, xmm0, xmm1
  6944. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  6945. vpshufb xmm9, xmm9, xmm1
  6946. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  6947. vpshufb xmm10, xmm10, xmm1
  6948. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  6949. vpshufb xmm11, xmm11, xmm1
  6950. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  6951. vpshufb xmm12, xmm12, xmm1
  6952. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  6953. vpshufb xmm13, xmm13, xmm1
  6954. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  6955. vpshufb xmm14, xmm14, xmm1
  6956. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  6957. vpshufb xmm15, xmm15, xmm1
  6958. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  6959. vmovdqa xmm7, OWORD PTR [r15]
  6960. vmovdqu OWORD PTR [rsp+128], xmm0
  6961. vpxor xmm8, xmm8, xmm7
  6962. vpxor xmm9, xmm9, xmm7
  6963. vpxor xmm10, xmm10, xmm7
  6964. vpxor xmm11, xmm11, xmm7
  6965. vpxor xmm12, xmm12, xmm7
  6966. vpxor xmm13, xmm13, xmm7
  6967. vpxor xmm14, xmm14, xmm7
  6968. vpxor xmm15, xmm15, xmm7
  6969. vmovdqu xmm7, OWORD PTR [rsp+112]
  6970. vmovdqu xmm0, OWORD PTR [rdx+-128]
  6971. vaesenc xmm8, xmm8, [r15+16]
  6972. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6973. vpxor xmm0, xmm0, xmm2
  6974. vpshufd xmm1, xmm7, 78
  6975. vpshufd xmm5, xmm0, 78
  6976. vpxor xmm1, xmm1, xmm7
  6977. vpxor xmm5, xmm5, xmm0
  6978. vpclmulqdq xmm3, xmm0, xmm7, 17
  6979. vaesenc xmm9, xmm9, [r15+16]
  6980. vaesenc xmm10, xmm10, [r15+16]
  6981. vpclmulqdq xmm2, xmm0, xmm7, 0
  6982. vaesenc xmm11, xmm11, [r15+16]
  6983. vaesenc xmm12, xmm12, [r15+16]
  6984. vpclmulqdq xmm1, xmm1, xmm5, 0
  6985. vaesenc xmm13, xmm13, [r15+16]
  6986. vaesenc xmm14, xmm14, [r15+16]
  6987. vaesenc xmm15, xmm15, [r15+16]
  6988. vpxor xmm1, xmm1, xmm2
  6989. vpxor xmm1, xmm1, xmm3
  6990. vmovdqu xmm7, OWORD PTR [rsp+96]
  6991. vmovdqu xmm0, OWORD PTR [rdx+-112]
  6992. vpshufd xmm4, xmm7, 78
  6993. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  6994. vaesenc xmm8, xmm8, [r15+32]
  6995. vpxor xmm4, xmm4, xmm7
  6996. vpshufd xmm5, xmm0, 78
  6997. vpxor xmm5, xmm5, xmm0
  6998. vpclmulqdq xmm6, xmm0, xmm7, 17
  6999. vaesenc xmm9, xmm9, [r15+32]
  7000. vaesenc xmm10, xmm10, [r15+32]
  7001. vpclmulqdq xmm7, xmm0, xmm7, 0
  7002. vaesenc xmm11, xmm11, [r15+32]
  7003. vaesenc xmm12, xmm12, [r15+32]
  7004. vpclmulqdq xmm4, xmm4, xmm5, 0
  7005. vaesenc xmm13, xmm13, [r15+32]
  7006. vaesenc xmm14, xmm14, [r15+32]
  7007. vaesenc xmm15, xmm15, [r15+32]
  7008. vpxor xmm1, xmm1, xmm7
  7009. vpxor xmm2, xmm2, xmm7
  7010. vpxor xmm1, xmm1, xmm6
  7011. vpxor xmm3, xmm3, xmm6
  7012. vpxor xmm1, xmm1, xmm4
  7013. vmovdqu xmm7, OWORD PTR [rsp+80]
  7014. vmovdqu xmm0, OWORD PTR [rdx+-96]
  7015. vpshufd xmm4, xmm7, 78
  7016. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7017. vaesenc xmm8, xmm8, [r15+48]
  7018. vpxor xmm4, xmm4, xmm7
  7019. vpshufd xmm5, xmm0, 78
  7020. vpxor xmm5, xmm5, xmm0
  7021. vpclmulqdq xmm6, xmm0, xmm7, 17
  7022. vaesenc xmm9, xmm9, [r15+48]
  7023. vaesenc xmm10, xmm10, [r15+48]
  7024. vpclmulqdq xmm7, xmm0, xmm7, 0
  7025. vaesenc xmm11, xmm11, [r15+48]
  7026. vaesenc xmm12, xmm12, [r15+48]
  7027. vpclmulqdq xmm4, xmm4, xmm5, 0
  7028. vaesenc xmm13, xmm13, [r15+48]
  7029. vaesenc xmm14, xmm14, [r15+48]
  7030. vaesenc xmm15, xmm15, [r15+48]
  7031. vpxor xmm1, xmm1, xmm7
  7032. vpxor xmm2, xmm2, xmm7
  7033. vpxor xmm1, xmm1, xmm6
  7034. vpxor xmm3, xmm3, xmm6
  7035. vpxor xmm1, xmm1, xmm4
  7036. vmovdqu xmm7, OWORD PTR [rsp+64]
  7037. vmovdqu xmm0, OWORD PTR [rdx+-80]
  7038. vpshufd xmm4, xmm7, 78
  7039. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7040. vaesenc xmm8, xmm8, [r15+64]
  7041. vpxor xmm4, xmm4, xmm7
  7042. vpshufd xmm5, xmm0, 78
  7043. vpxor xmm5, xmm5, xmm0
  7044. vpclmulqdq xmm6, xmm0, xmm7, 17
  7045. vaesenc xmm9, xmm9, [r15+64]
  7046. vaesenc xmm10, xmm10, [r15+64]
  7047. vpclmulqdq xmm7, xmm0, xmm7, 0
  7048. vaesenc xmm11, xmm11, [r15+64]
  7049. vaesenc xmm12, xmm12, [r15+64]
  7050. vpclmulqdq xmm4, xmm4, xmm5, 0
  7051. vaesenc xmm13, xmm13, [r15+64]
  7052. vaesenc xmm14, xmm14, [r15+64]
  7053. vaesenc xmm15, xmm15, [r15+64]
  7054. vpxor xmm1, xmm1, xmm7
  7055. vpxor xmm2, xmm2, xmm7
  7056. vpxor xmm1, xmm1, xmm6
  7057. vpxor xmm3, xmm3, xmm6
  7058. vpxor xmm1, xmm1, xmm4
  7059. vmovdqu xmm7, OWORD PTR [rsp+48]
  7060. vmovdqu xmm0, OWORD PTR [rdx+-64]
  7061. vpshufd xmm4, xmm7, 78
  7062. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7063. vaesenc xmm8, xmm8, [r15+80]
  7064. vpxor xmm4, xmm4, xmm7
  7065. vpshufd xmm5, xmm0, 78
  7066. vpxor xmm5, xmm5, xmm0
  7067. vpclmulqdq xmm6, xmm0, xmm7, 17
  7068. vaesenc xmm9, xmm9, [r15+80]
  7069. vaesenc xmm10, xmm10, [r15+80]
  7070. vpclmulqdq xmm7, xmm0, xmm7, 0
  7071. vaesenc xmm11, xmm11, [r15+80]
  7072. vaesenc xmm12, xmm12, [r15+80]
  7073. vpclmulqdq xmm4, xmm4, xmm5, 0
  7074. vaesenc xmm13, xmm13, [r15+80]
  7075. vaesenc xmm14, xmm14, [r15+80]
  7076. vaesenc xmm15, xmm15, [r15+80]
  7077. vpxor xmm1, xmm1, xmm7
  7078. vpxor xmm2, xmm2, xmm7
  7079. vpxor xmm1, xmm1, xmm6
  7080. vpxor xmm3, xmm3, xmm6
  7081. vpxor xmm1, xmm1, xmm4
  7082. vmovdqu xmm7, OWORD PTR [rsp+32]
  7083. vmovdqu xmm0, OWORD PTR [rdx+-48]
  7084. vpshufd xmm4, xmm7, 78
  7085. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7086. vaesenc xmm8, xmm8, [r15+96]
  7087. vpxor xmm4, xmm4, xmm7
  7088. vpshufd xmm5, xmm0, 78
  7089. vpxor xmm5, xmm5, xmm0
  7090. vpclmulqdq xmm6, xmm0, xmm7, 17
  7091. vaesenc xmm9, xmm9, [r15+96]
  7092. vaesenc xmm10, xmm10, [r15+96]
  7093. vpclmulqdq xmm7, xmm0, xmm7, 0
  7094. vaesenc xmm11, xmm11, [r15+96]
  7095. vaesenc xmm12, xmm12, [r15+96]
  7096. vpclmulqdq xmm4, xmm4, xmm5, 0
  7097. vaesenc xmm13, xmm13, [r15+96]
  7098. vaesenc xmm14, xmm14, [r15+96]
  7099. vaesenc xmm15, xmm15, [r15+96]
  7100. vpxor xmm1, xmm1, xmm7
  7101. vpxor xmm2, xmm2, xmm7
  7102. vpxor xmm1, xmm1, xmm6
  7103. vpxor xmm3, xmm3, xmm6
  7104. vpxor xmm1, xmm1, xmm4
  7105. vmovdqu xmm7, OWORD PTR [rsp+16]
  7106. vmovdqu xmm0, OWORD PTR [rdx+-32]
  7107. vpshufd xmm4, xmm7, 78
  7108. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7109. vaesenc xmm8, xmm8, [r15+112]
  7110. vpxor xmm4, xmm4, xmm7
  7111. vpshufd xmm5, xmm0, 78
  7112. vpxor xmm5, xmm5, xmm0
  7113. vpclmulqdq xmm6, xmm0, xmm7, 17
  7114. vaesenc xmm9, xmm9, [r15+112]
  7115. vaesenc xmm10, xmm10, [r15+112]
  7116. vpclmulqdq xmm7, xmm0, xmm7, 0
  7117. vaesenc xmm11, xmm11, [r15+112]
  7118. vaesenc xmm12, xmm12, [r15+112]
  7119. vpclmulqdq xmm4, xmm4, xmm5, 0
  7120. vaesenc xmm13, xmm13, [r15+112]
  7121. vaesenc xmm14, xmm14, [r15+112]
  7122. vaesenc xmm15, xmm15, [r15+112]
  7123. vpxor xmm1, xmm1, xmm7
  7124. vpxor xmm2, xmm2, xmm7
  7125. vpxor xmm1, xmm1, xmm6
  7126. vpxor xmm3, xmm3, xmm6
  7127. vpxor xmm1, xmm1, xmm4
  7128. vmovdqu xmm7, OWORD PTR [rsp]
  7129. vmovdqu xmm0, OWORD PTR [rdx+-16]
  7130. vpshufd xmm4, xmm7, 78
  7131. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7132. vaesenc xmm8, xmm8, [r15+128]
  7133. vpxor xmm4, xmm4, xmm7
  7134. vpshufd xmm5, xmm0, 78
  7135. vpxor xmm5, xmm5, xmm0
  7136. vpclmulqdq xmm6, xmm0, xmm7, 17
  7137. vaesenc xmm9, xmm9, [r15+128]
  7138. vaesenc xmm10, xmm10, [r15+128]
  7139. vpclmulqdq xmm7, xmm0, xmm7, 0
  7140. vaesenc xmm11, xmm11, [r15+128]
  7141. vaesenc xmm12, xmm12, [r15+128]
  7142. vpclmulqdq xmm4, xmm4, xmm5, 0
  7143. vaesenc xmm13, xmm13, [r15+128]
  7144. vaesenc xmm14, xmm14, [r15+128]
  7145. vaesenc xmm15, xmm15, [r15+128]
  7146. vpxor xmm1, xmm1, xmm7
  7147. vpxor xmm2, xmm2, xmm7
  7148. vpxor xmm1, xmm1, xmm6
  7149. vpxor xmm3, xmm3, xmm6
  7150. vpxor xmm1, xmm1, xmm4
  7151. vpslldq xmm5, xmm1, 8
  7152. vpsrldq xmm1, xmm1, 8
  7153. vaesenc xmm8, xmm8, [r15+144]
  7154. vpxor xmm2, xmm2, xmm5
  7155. vpxor xmm3, xmm3, xmm1
  7156. vaesenc xmm9, xmm9, [r15+144]
  7157. vpslld xmm7, xmm2, 31
  7158. vpslld xmm4, xmm2, 30
  7159. vpslld xmm5, xmm2, 25
  7160. vaesenc xmm10, xmm10, [r15+144]
  7161. vpxor xmm7, xmm7, xmm4
  7162. vpxor xmm7, xmm7, xmm5
  7163. vaesenc xmm11, xmm11, [r15+144]
  7164. vpsrldq xmm4, xmm7, 4
  7165. vpslldq xmm7, xmm7, 12
  7166. vaesenc xmm12, xmm12, [r15+144]
  7167. vpxor xmm2, xmm2, xmm7
  7168. vpsrld xmm5, xmm2, 1
  7169. vaesenc xmm13, xmm13, [r15+144]
  7170. vpsrld xmm1, xmm2, 2
  7171. vpsrld xmm0, xmm2, 7
  7172. vaesenc xmm14, xmm14, [r15+144]
  7173. vpxor xmm5, xmm5, xmm1
  7174. vpxor xmm5, xmm5, xmm0
  7175. vaesenc xmm15, xmm15, [r15+144]
  7176. vpxor xmm5, xmm5, xmm4
  7177. vpxor xmm2, xmm2, xmm5
  7178. vpxor xmm2, xmm2, xmm3
  7179. cmp r10d, 11
  7180. vmovdqa xmm7, OWORD PTR [r15+160]
  7181. jl L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done
  7182. vaesenc xmm8, xmm8, xmm7
  7183. vaesenc xmm9, xmm9, xmm7
  7184. vaesenc xmm10, xmm10, xmm7
  7185. vaesenc xmm11, xmm11, xmm7
  7186. vaesenc xmm12, xmm12, xmm7
  7187. vaesenc xmm13, xmm13, xmm7
  7188. vaesenc xmm14, xmm14, xmm7
  7189. vaesenc xmm15, xmm15, xmm7
  7190. vmovdqa xmm7, OWORD PTR [r15+176]
  7191. vaesenc xmm8, xmm8, xmm7
  7192. vaesenc xmm9, xmm9, xmm7
  7193. vaesenc xmm10, xmm10, xmm7
  7194. vaesenc xmm11, xmm11, xmm7
  7195. vaesenc xmm12, xmm12, xmm7
  7196. vaesenc xmm13, xmm13, xmm7
  7197. vaesenc xmm14, xmm14, xmm7
  7198. vaesenc xmm15, xmm15, xmm7
  7199. cmp r10d, 13
  7200. vmovdqa xmm7, OWORD PTR [r15+192]
  7201. jl L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done
  7202. vaesenc xmm8, xmm8, xmm7
  7203. vaesenc xmm9, xmm9, xmm7
  7204. vaesenc xmm10, xmm10, xmm7
  7205. vaesenc xmm11, xmm11, xmm7
  7206. vaesenc xmm12, xmm12, xmm7
  7207. vaesenc xmm13, xmm13, xmm7
  7208. vaesenc xmm14, xmm14, xmm7
  7209. vaesenc xmm15, xmm15, xmm7
  7210. vmovdqa xmm7, OWORD PTR [r15+208]
  7211. vaesenc xmm8, xmm8, xmm7
  7212. vaesenc xmm9, xmm9, xmm7
  7213. vaesenc xmm10, xmm10, xmm7
  7214. vaesenc xmm11, xmm11, xmm7
  7215. vaesenc xmm12, xmm12, xmm7
  7216. vaesenc xmm13, xmm13, xmm7
  7217. vaesenc xmm14, xmm14, xmm7
  7218. vaesenc xmm15, xmm15, xmm7
  7219. vmovdqa xmm7, OWORD PTR [r15+224]
  7220. L_AES_GCM_encrypt_avx1_aesenc_128_ghash_avx_done:
  7221. vaesenclast xmm8, xmm8, xmm7
  7222. vaesenclast xmm9, xmm9, xmm7
  7223. vmovdqu xmm0, OWORD PTR [rcx]
  7224. vmovdqu xmm1, OWORD PTR [rcx+16]
  7225. vpxor xmm8, xmm8, xmm0
  7226. vpxor xmm9, xmm9, xmm1
  7227. vmovdqu OWORD PTR [rdx], xmm8
  7228. vmovdqu OWORD PTR [rdx+16], xmm9
  7229. vaesenclast xmm10, xmm10, xmm7
  7230. vaesenclast xmm11, xmm11, xmm7
  7231. vmovdqu xmm0, OWORD PTR [rcx+32]
  7232. vmovdqu xmm1, OWORD PTR [rcx+48]
  7233. vpxor xmm10, xmm10, xmm0
  7234. vpxor xmm11, xmm11, xmm1
  7235. vmovdqu OWORD PTR [rdx+32], xmm10
  7236. vmovdqu OWORD PTR [rdx+48], xmm11
  7237. vaesenclast xmm12, xmm12, xmm7
  7238. vaesenclast xmm13, xmm13, xmm7
  7239. vmovdqu xmm0, OWORD PTR [rcx+64]
  7240. vmovdqu xmm1, OWORD PTR [rcx+80]
  7241. vpxor xmm12, xmm12, xmm0
  7242. vpxor xmm13, xmm13, xmm1
  7243. vmovdqu OWORD PTR [rdx+64], xmm12
  7244. vmovdqu OWORD PTR [rdx+80], xmm13
  7245. vaesenclast xmm14, xmm14, xmm7
  7246. vaesenclast xmm15, xmm15, xmm7
  7247. vmovdqu xmm0, OWORD PTR [rcx+96]
  7248. vmovdqu xmm1, OWORD PTR [rcx+112]
  7249. vpxor xmm14, xmm14, xmm0
  7250. vpxor xmm15, xmm15, xmm1
  7251. vmovdqu OWORD PTR [rdx+96], xmm14
  7252. vmovdqu OWORD PTR [rdx+112], xmm15
  7253. add ebx, 128
  7254. cmp ebx, r13d
  7255. jl L_AES_GCM_encrypt_avx1_ghash_128
  7256. L_AES_GCM_encrypt_avx1_end_128:
  7257. vmovdqa xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7258. vpshufb xmm8, xmm8, xmm4
  7259. vpshufb xmm9, xmm9, xmm4
  7260. vpshufb xmm10, xmm10, xmm4
  7261. vpshufb xmm11, xmm11, xmm4
  7262. vpxor xmm8, xmm8, xmm2
  7263. vpshufb xmm12, xmm12, xmm4
  7264. vpshufb xmm13, xmm13, xmm4
  7265. vpshufb xmm14, xmm14, xmm4
  7266. vpshufb xmm15, xmm15, xmm4
  7267. vmovdqu xmm7, OWORD PTR [rsp]
  7268. vmovdqu xmm5, OWORD PTR [rsp+16]
  7269. ; ghash_gfmul_avx
  7270. vpshufd xmm1, xmm15, 78
  7271. vpshufd xmm2, xmm7, 78
  7272. vpclmulqdq xmm3, xmm7, xmm15, 17
  7273. vpclmulqdq xmm0, xmm7, xmm15, 0
  7274. vpxor xmm1, xmm1, xmm15
  7275. vpxor xmm2, xmm2, xmm7
  7276. vpclmulqdq xmm1, xmm1, xmm2, 0
  7277. vpxor xmm1, xmm1, xmm0
  7278. vpxor xmm1, xmm1, xmm3
  7279. vmovdqa xmm4, xmm0
  7280. vmovdqa xmm6, xmm3
  7281. vpslldq xmm2, xmm1, 8
  7282. vpsrldq xmm1, xmm1, 8
  7283. vpxor xmm4, xmm4, xmm2
  7284. vpxor xmm6, xmm6, xmm1
  7285. ; ghash_gfmul_xor_avx
  7286. vpshufd xmm1, xmm14, 78
  7287. vpshufd xmm2, xmm5, 78
  7288. vpclmulqdq xmm3, xmm5, xmm14, 17
  7289. vpclmulqdq xmm0, xmm5, xmm14, 0
  7290. vpxor xmm1, xmm1, xmm14
  7291. vpxor xmm2, xmm2, xmm5
  7292. vpclmulqdq xmm1, xmm1, xmm2, 0
  7293. vpxor xmm1, xmm1, xmm0
  7294. vpxor xmm1, xmm1, xmm3
  7295. vpxor xmm4, xmm4, xmm0
  7296. vpxor xmm6, xmm6, xmm3
  7297. vpslldq xmm2, xmm1, 8
  7298. vpsrldq xmm1, xmm1, 8
  7299. vpxor xmm4, xmm4, xmm2
  7300. vpxor xmm6, xmm6, xmm1
  7301. vmovdqu xmm7, OWORD PTR [rsp+32]
  7302. vmovdqu xmm5, OWORD PTR [rsp+48]
  7303. ; ghash_gfmul_xor_avx
  7304. vpshufd xmm1, xmm13, 78
  7305. vpshufd xmm2, xmm7, 78
  7306. vpclmulqdq xmm3, xmm7, xmm13, 17
  7307. vpclmulqdq xmm0, xmm7, xmm13, 0
  7308. vpxor xmm1, xmm1, xmm13
  7309. vpxor xmm2, xmm2, xmm7
  7310. vpclmulqdq xmm1, xmm1, xmm2, 0
  7311. vpxor xmm1, xmm1, xmm0
  7312. vpxor xmm1, xmm1, xmm3
  7313. vpxor xmm4, xmm4, xmm0
  7314. vpxor xmm6, xmm6, xmm3
  7315. vpslldq xmm2, xmm1, 8
  7316. vpsrldq xmm1, xmm1, 8
  7317. vpxor xmm4, xmm4, xmm2
  7318. vpxor xmm6, xmm6, xmm1
  7319. ; ghash_gfmul_xor_avx
  7320. vpshufd xmm1, xmm12, 78
  7321. vpshufd xmm2, xmm5, 78
  7322. vpclmulqdq xmm3, xmm5, xmm12, 17
  7323. vpclmulqdq xmm0, xmm5, xmm12, 0
  7324. vpxor xmm1, xmm1, xmm12
  7325. vpxor xmm2, xmm2, xmm5
  7326. vpclmulqdq xmm1, xmm1, xmm2, 0
  7327. vpxor xmm1, xmm1, xmm0
  7328. vpxor xmm1, xmm1, xmm3
  7329. vpxor xmm4, xmm4, xmm0
  7330. vpxor xmm6, xmm6, xmm3
  7331. vpslldq xmm2, xmm1, 8
  7332. vpsrldq xmm1, xmm1, 8
  7333. vpxor xmm4, xmm4, xmm2
  7334. vpxor xmm6, xmm6, xmm1
  7335. vmovdqu xmm7, OWORD PTR [rsp+64]
  7336. vmovdqu xmm5, OWORD PTR [rsp+80]
  7337. ; ghash_gfmul_xor_avx
  7338. vpshufd xmm1, xmm11, 78
  7339. vpshufd xmm2, xmm7, 78
  7340. vpclmulqdq xmm3, xmm7, xmm11, 17
  7341. vpclmulqdq xmm0, xmm7, xmm11, 0
  7342. vpxor xmm1, xmm1, xmm11
  7343. vpxor xmm2, xmm2, xmm7
  7344. vpclmulqdq xmm1, xmm1, xmm2, 0
  7345. vpxor xmm1, xmm1, xmm0
  7346. vpxor xmm1, xmm1, xmm3
  7347. vpxor xmm4, xmm4, xmm0
  7348. vpxor xmm6, xmm6, xmm3
  7349. vpslldq xmm2, xmm1, 8
  7350. vpsrldq xmm1, xmm1, 8
  7351. vpxor xmm4, xmm4, xmm2
  7352. vpxor xmm6, xmm6, xmm1
  7353. ; ghash_gfmul_xor_avx
  7354. vpshufd xmm1, xmm10, 78
  7355. vpshufd xmm2, xmm5, 78
  7356. vpclmulqdq xmm3, xmm5, xmm10, 17
  7357. vpclmulqdq xmm0, xmm5, xmm10, 0
  7358. vpxor xmm1, xmm1, xmm10
  7359. vpxor xmm2, xmm2, xmm5
  7360. vpclmulqdq xmm1, xmm1, xmm2, 0
  7361. vpxor xmm1, xmm1, xmm0
  7362. vpxor xmm1, xmm1, xmm3
  7363. vpxor xmm4, xmm4, xmm0
  7364. vpxor xmm6, xmm6, xmm3
  7365. vpslldq xmm2, xmm1, 8
  7366. vpsrldq xmm1, xmm1, 8
  7367. vpxor xmm4, xmm4, xmm2
  7368. vpxor xmm6, xmm6, xmm1
  7369. vmovdqu xmm7, OWORD PTR [rsp+96]
  7370. vmovdqu xmm5, OWORD PTR [rsp+112]
  7371. ; ghash_gfmul_xor_avx
  7372. vpshufd xmm1, xmm9, 78
  7373. vpshufd xmm2, xmm7, 78
  7374. vpclmulqdq xmm3, xmm7, xmm9, 17
  7375. vpclmulqdq xmm0, xmm7, xmm9, 0
  7376. vpxor xmm1, xmm1, xmm9
  7377. vpxor xmm2, xmm2, xmm7
  7378. vpclmulqdq xmm1, xmm1, xmm2, 0
  7379. vpxor xmm1, xmm1, xmm0
  7380. vpxor xmm1, xmm1, xmm3
  7381. vpxor xmm4, xmm4, xmm0
  7382. vpxor xmm6, xmm6, xmm3
  7383. vpslldq xmm2, xmm1, 8
  7384. vpsrldq xmm1, xmm1, 8
  7385. vpxor xmm4, xmm4, xmm2
  7386. vpxor xmm6, xmm6, xmm1
  7387. ; ghash_gfmul_xor_avx
  7388. vpshufd xmm1, xmm8, 78
  7389. vpshufd xmm2, xmm5, 78
  7390. vpclmulqdq xmm3, xmm5, xmm8, 17
  7391. vpclmulqdq xmm0, xmm5, xmm8, 0
  7392. vpxor xmm1, xmm1, xmm8
  7393. vpxor xmm2, xmm2, xmm5
  7394. vpclmulqdq xmm1, xmm1, xmm2, 0
  7395. vpxor xmm1, xmm1, xmm0
  7396. vpxor xmm1, xmm1, xmm3
  7397. vpxor xmm4, xmm4, xmm0
  7398. vpxor xmm6, xmm6, xmm3
  7399. vpslldq xmm2, xmm1, 8
  7400. vpsrldq xmm1, xmm1, 8
  7401. vpxor xmm4, xmm4, xmm2
  7402. vpxor xmm6, xmm6, xmm1
  7403. vpslld xmm0, xmm4, 31
  7404. vpslld xmm1, xmm4, 30
  7405. vpslld xmm2, xmm4, 25
  7406. vpxor xmm0, xmm0, xmm1
  7407. vpxor xmm0, xmm0, xmm2
  7408. vmovdqa xmm1, xmm0
  7409. vpsrldq xmm1, xmm1, 4
  7410. vpslldq xmm0, xmm0, 12
  7411. vpxor xmm4, xmm4, xmm0
  7412. vpsrld xmm2, xmm4, 1
  7413. vpsrld xmm3, xmm4, 2
  7414. vpsrld xmm0, xmm4, 7
  7415. vpxor xmm2, xmm2, xmm3
  7416. vpxor xmm2, xmm2, xmm0
  7417. vpxor xmm2, xmm2, xmm1
  7418. vpxor xmm2, xmm2, xmm4
  7419. vpxor xmm6, xmm6, xmm2
  7420. vmovdqu xmm5, OWORD PTR [rsp]
  7421. L_AES_GCM_encrypt_avx1_done_128:
  7422. mov edx, r9d
  7423. cmp ebx, edx
  7424. jge L_AES_GCM_encrypt_avx1_done_enc
  7425. mov r13d, r9d
  7426. and r13d, 4294967280
  7427. cmp ebx, r13d
  7428. jge L_AES_GCM_encrypt_avx1_last_block_done
  7429. vmovdqu xmm9, OWORD PTR [rsp+128]
  7430. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  7431. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  7432. vmovdqu OWORD PTR [rsp+128], xmm9
  7433. vpxor xmm8, xmm8, [r15]
  7434. vaesenc xmm8, xmm8, [r15+16]
  7435. vaesenc xmm8, xmm8, [r15+32]
  7436. vaesenc xmm8, xmm8, [r15+48]
  7437. vaesenc xmm8, xmm8, [r15+64]
  7438. vaesenc xmm8, xmm8, [r15+80]
  7439. vaesenc xmm8, xmm8, [r15+96]
  7440. vaesenc xmm8, xmm8, [r15+112]
  7441. vaesenc xmm8, xmm8, [r15+128]
  7442. vaesenc xmm8, xmm8, [r15+144]
  7443. cmp r10d, 11
  7444. vmovdqa xmm9, OWORD PTR [r15+160]
  7445. jl L_AES_GCM_encrypt_avx1_aesenc_block_last
  7446. vaesenc xmm8, xmm8, xmm9
  7447. vaesenc xmm8, xmm8, [r15+176]
  7448. cmp r10d, 13
  7449. vmovdqa xmm9, OWORD PTR [r15+192]
  7450. jl L_AES_GCM_encrypt_avx1_aesenc_block_last
  7451. vaesenc xmm8, xmm8, xmm9
  7452. vaesenc xmm8, xmm8, [r15+208]
  7453. vmovdqa xmm9, OWORD PTR [r15+224]
  7454. L_AES_GCM_encrypt_avx1_aesenc_block_last:
  7455. vaesenclast xmm8, xmm8, xmm9
  7456. vmovdqu xmm9, OWORD PTR [rdi+rbx]
  7457. vpxor xmm8, xmm8, xmm9
  7458. vmovdqu OWORD PTR [rsi+rbx], xmm8
  7459. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7460. vpxor xmm6, xmm6, xmm8
  7461. add ebx, 16
  7462. cmp ebx, r13d
  7463. jge L_AES_GCM_encrypt_avx1_last_block_ghash
  7464. L_AES_GCM_encrypt_avx1_last_block_start:
  7465. vmovdqu xmm13, OWORD PTR [rdi+rbx]
  7466. vmovdqu xmm9, OWORD PTR [rsp+128]
  7467. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  7468. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  7469. vmovdqu OWORD PTR [rsp+128], xmm9
  7470. vpxor xmm8, xmm8, [r15]
  7471. vpclmulqdq xmm10, xmm6, xmm5, 16
  7472. vaesenc xmm8, xmm8, [r15+16]
  7473. vaesenc xmm8, xmm8, [r15+32]
  7474. vpclmulqdq xmm11, xmm6, xmm5, 1
  7475. vaesenc xmm8, xmm8, [r15+48]
  7476. vaesenc xmm8, xmm8, [r15+64]
  7477. vpclmulqdq xmm12, xmm6, xmm5, 0
  7478. vaesenc xmm8, xmm8, [r15+80]
  7479. vpclmulqdq xmm1, xmm6, xmm5, 17
  7480. vaesenc xmm8, xmm8, [r15+96]
  7481. vpxor xmm10, xmm10, xmm11
  7482. vpslldq xmm2, xmm10, 8
  7483. vpsrldq xmm10, xmm10, 8
  7484. vaesenc xmm8, xmm8, [r15+112]
  7485. vpxor xmm2, xmm2, xmm12
  7486. vpxor xmm3, xmm1, xmm10
  7487. vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
  7488. vpclmulqdq xmm11, xmm2, xmm0, 16
  7489. vaesenc xmm8, xmm8, [r15+128]
  7490. vpshufd xmm10, xmm2, 78
  7491. vpxor xmm10, xmm10, xmm11
  7492. vpclmulqdq xmm11, xmm10, xmm0, 16
  7493. vaesenc xmm8, xmm8, [r15+144]
  7494. vpshufd xmm10, xmm10, 78
  7495. vpxor xmm10, xmm10, xmm11
  7496. vpxor xmm6, xmm10, xmm3
  7497. cmp r10d, 11
  7498. vmovdqa xmm9, OWORD PTR [r15+160]
  7499. jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last
  7500. vaesenc xmm8, xmm8, xmm9
  7501. vaesenc xmm8, xmm8, [r15+176]
  7502. cmp r10d, 13
  7503. vmovdqa xmm9, OWORD PTR [r15+192]
  7504. jl L_AES_GCM_encrypt_avx1_aesenc_gfmul_last
  7505. vaesenc xmm8, xmm8, xmm9
  7506. vaesenc xmm8, xmm8, [r15+208]
  7507. vmovdqa xmm9, OWORD PTR [r15+224]
  7508. L_AES_GCM_encrypt_avx1_aesenc_gfmul_last:
  7509. vaesenclast xmm8, xmm8, xmm9
  7510. vmovdqa xmm0, xmm13
  7511. vpxor xmm8, xmm8, xmm0
  7512. vmovdqu OWORD PTR [rsi+rbx], xmm8
  7513. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7514. add ebx, 16
  7515. vpxor xmm6, xmm6, xmm8
  7516. cmp ebx, r13d
  7517. jl L_AES_GCM_encrypt_avx1_last_block_start
  7518. L_AES_GCM_encrypt_avx1_last_block_ghash:
  7519. ; ghash_gfmul_red_avx
  7520. vpshufd xmm9, xmm5, 78
  7521. vpshufd xmm10, xmm6, 78
  7522. vpclmulqdq xmm11, xmm6, xmm5, 17
  7523. vpclmulqdq xmm8, xmm6, xmm5, 0
  7524. vpxor xmm9, xmm9, xmm5
  7525. vpxor xmm10, xmm10, xmm6
  7526. vpclmulqdq xmm9, xmm9, xmm10, 0
  7527. vpxor xmm9, xmm9, xmm8
  7528. vpxor xmm9, xmm9, xmm11
  7529. vpslldq xmm10, xmm9, 8
  7530. vpsrldq xmm9, xmm9, 8
  7531. vpxor xmm8, xmm8, xmm10
  7532. vpxor xmm6, xmm11, xmm9
  7533. vpslld xmm12, xmm8, 31
  7534. vpslld xmm13, xmm8, 30
  7535. vpslld xmm14, xmm8, 25
  7536. vpxor xmm12, xmm12, xmm13
  7537. vpxor xmm12, xmm12, xmm14
  7538. vpsrldq xmm13, xmm12, 4
  7539. vpslldq xmm12, xmm12, 12
  7540. vpxor xmm8, xmm8, xmm12
  7541. vpsrld xmm14, xmm8, 1
  7542. vpsrld xmm10, xmm8, 2
  7543. vpsrld xmm9, xmm8, 7
  7544. vpxor xmm14, xmm14, xmm10
  7545. vpxor xmm14, xmm14, xmm9
  7546. vpxor xmm14, xmm14, xmm13
  7547. vpxor xmm14, xmm14, xmm8
  7548. vpxor xmm6, xmm6, xmm14
  7549. L_AES_GCM_encrypt_avx1_last_block_done:
  7550. mov ecx, r9d
  7551. mov edx, ecx
  7552. and ecx, 15
  7553. jz L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done
  7554. vmovdqu xmm4, OWORD PTR [rsp+128]
  7555. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  7556. vpxor xmm4, xmm4, [r15]
  7557. vaesenc xmm4, xmm4, [r15+16]
  7558. vaesenc xmm4, xmm4, [r15+32]
  7559. vaesenc xmm4, xmm4, [r15+48]
  7560. vaesenc xmm4, xmm4, [r15+64]
  7561. vaesenc xmm4, xmm4, [r15+80]
  7562. vaesenc xmm4, xmm4, [r15+96]
  7563. vaesenc xmm4, xmm4, [r15+112]
  7564. vaesenc xmm4, xmm4, [r15+128]
  7565. vaesenc xmm4, xmm4, [r15+144]
  7566. cmp r10d, 11
  7567. vmovdqa xmm9, OWORD PTR [r15+160]
  7568. jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last
  7569. vaesenc xmm4, xmm4, xmm9
  7570. vaesenc xmm4, xmm4, [r15+176]
  7571. cmp r10d, 13
  7572. vmovdqa xmm9, OWORD PTR [r15+192]
  7573. jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last
  7574. vaesenc xmm4, xmm4, xmm9
  7575. vaesenc xmm4, xmm4, [r15+208]
  7576. vmovdqa xmm9, OWORD PTR [r15+224]
  7577. L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_aesenc_avx_last:
  7578. vaesenclast xmm4, xmm4, xmm9
  7579. sub rsp, 16
  7580. xor ecx, ecx
  7581. vmovdqu OWORD PTR [rsp], xmm4
  7582. L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop:
  7583. movzx r13d, BYTE PTR [rdi+rbx]
  7584. xor r13b, BYTE PTR [rsp+rcx]
  7585. mov BYTE PTR [rsi+rbx], r13b
  7586. mov BYTE PTR [rsp+rcx], r13b
  7587. inc ebx
  7588. inc ecx
  7589. cmp ebx, edx
  7590. jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_loop
  7591. xor r13, r13
  7592. cmp ecx, 16
  7593. je L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc
  7594. L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop:
  7595. mov BYTE PTR [rsp+rcx], r13b
  7596. inc ecx
  7597. cmp ecx, 16
  7598. jl L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_byte_loop
  7599. L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_finish_enc:
  7600. vmovdqu xmm4, OWORD PTR [rsp]
  7601. add rsp, 16
  7602. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7603. vpxor xmm6, xmm6, xmm4
  7604. ; ghash_gfmul_red_avx
  7605. vpshufd xmm9, xmm5, 78
  7606. vpshufd xmm10, xmm6, 78
  7607. vpclmulqdq xmm11, xmm6, xmm5, 17
  7608. vpclmulqdq xmm8, xmm6, xmm5, 0
  7609. vpxor xmm9, xmm9, xmm5
  7610. vpxor xmm10, xmm10, xmm6
  7611. vpclmulqdq xmm9, xmm9, xmm10, 0
  7612. vpxor xmm9, xmm9, xmm8
  7613. vpxor xmm9, xmm9, xmm11
  7614. vpslldq xmm10, xmm9, 8
  7615. vpsrldq xmm9, xmm9, 8
  7616. vpxor xmm8, xmm8, xmm10
  7617. vpxor xmm6, xmm11, xmm9
  7618. vpslld xmm12, xmm8, 31
  7619. vpslld xmm13, xmm8, 30
  7620. vpslld xmm14, xmm8, 25
  7621. vpxor xmm12, xmm12, xmm13
  7622. vpxor xmm12, xmm12, xmm14
  7623. vpsrldq xmm13, xmm12, 4
  7624. vpslldq xmm12, xmm12, 12
  7625. vpxor xmm8, xmm8, xmm12
  7626. vpsrld xmm14, xmm8, 1
  7627. vpsrld xmm10, xmm8, 2
  7628. vpsrld xmm9, xmm8, 7
  7629. vpxor xmm14, xmm14, xmm10
  7630. vpxor xmm14, xmm14, xmm9
  7631. vpxor xmm14, xmm14, xmm13
  7632. vpxor xmm14, xmm14, xmm8
  7633. vpxor xmm6, xmm6, xmm14
  7634. L_AES_GCM_encrypt_avx1_aesenc_last15_enc_avx_done:
  7635. L_AES_GCM_encrypt_avx1_done_enc:
  7636. mov edx, r9d
  7637. mov ecx, r11d
  7638. shl rdx, 3
  7639. shl rcx, 3
  7640. vmovq xmm0, rdx
  7641. vmovq xmm1, rcx
  7642. vpunpcklqdq xmm0, xmm0, xmm1
  7643. vpxor xmm6, xmm6, xmm0
  7644. ; ghash_gfmul_red_avx
  7645. vpshufd xmm9, xmm5, 78
  7646. vpshufd xmm10, xmm6, 78
  7647. vpclmulqdq xmm11, xmm6, xmm5, 17
  7648. vpclmulqdq xmm8, xmm6, xmm5, 0
  7649. vpxor xmm9, xmm9, xmm5
  7650. vpxor xmm10, xmm10, xmm6
  7651. vpclmulqdq xmm9, xmm9, xmm10, 0
  7652. vpxor xmm9, xmm9, xmm8
  7653. vpxor xmm9, xmm9, xmm11
  7654. vpslldq xmm10, xmm9, 8
  7655. vpsrldq xmm9, xmm9, 8
  7656. vpxor xmm8, xmm8, xmm10
  7657. vpxor xmm6, xmm11, xmm9
  7658. vpslld xmm12, xmm8, 31
  7659. vpslld xmm13, xmm8, 30
  7660. vpslld xmm14, xmm8, 25
  7661. vpxor xmm12, xmm12, xmm13
  7662. vpxor xmm12, xmm12, xmm14
  7663. vpsrldq xmm13, xmm12, 4
  7664. vpslldq xmm12, xmm12, 12
  7665. vpxor xmm8, xmm8, xmm12
  7666. vpsrld xmm14, xmm8, 1
  7667. vpsrld xmm10, xmm8, 2
  7668. vpsrld xmm9, xmm8, 7
  7669. vpxor xmm14, xmm14, xmm10
  7670. vpxor xmm14, xmm14, xmm9
  7671. vpxor xmm14, xmm14, xmm13
  7672. vpxor xmm14, xmm14, xmm8
  7673. vpxor xmm6, xmm6, xmm14
  7674. vpshufb xmm6, xmm6, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7675. vmovdqu xmm0, OWORD PTR [rsp+144]
  7676. vpxor xmm0, xmm0, xmm6
  7677. cmp r14d, 16
  7678. je L_AES_GCM_encrypt_avx1_store_tag_16
  7679. xor rcx, rcx
  7680. vmovdqu OWORD PTR [rsp], xmm0
  7681. L_AES_GCM_encrypt_avx1_store_tag_loop:
  7682. movzx r13d, BYTE PTR [rsp+rcx]
  7683. mov BYTE PTR [r8+rcx], r13b
  7684. inc ecx
  7685. cmp ecx, r14d
  7686. jne L_AES_GCM_encrypt_avx1_store_tag_loop
  7687. jmp L_AES_GCM_encrypt_avx1_store_tag_done
  7688. L_AES_GCM_encrypt_avx1_store_tag_16:
  7689. vmovdqu OWORD PTR [r8], xmm0
  7690. L_AES_GCM_encrypt_avx1_store_tag_done:
  7691. vzeroupper
  7692. add rsp, 160
  7693. pop r15
  7694. pop r14
  7695. pop rbx
  7696. pop r12
  7697. pop rsi
  7698. pop rdi
  7699. pop r13
  7700. ret
  7701. AES_GCM_encrypt_avx1 ENDP
  7702. _text ENDS
  7703. _text SEGMENT READONLY PARA
  7704. AES_GCM_decrypt_avx1 PROC
  7705. push r13
  7706. push rdi
  7707. push rsi
  7708. push r12
  7709. push rbx
  7710. push r14
  7711. push r15
  7712. push rbp
  7713. mov rdi, rcx
  7714. mov rsi, rdx
  7715. mov r12, r8
  7716. mov rax, r9
  7717. mov r8, QWORD PTR [rsp+104]
  7718. mov r9d, DWORD PTR [rsp+112]
  7719. mov r11d, DWORD PTR [rsp+120]
  7720. mov ebx, DWORD PTR [rsp+128]
  7721. mov r14d, DWORD PTR [rsp+136]
  7722. mov r15, QWORD PTR [rsp+144]
  7723. mov r10d, DWORD PTR [rsp+152]
  7724. mov rbp, QWORD PTR [rsp+160]
  7725. sub rsp, 168
  7726. vpxor xmm4, xmm4, xmm4
  7727. vpxor xmm6, xmm6, xmm6
  7728. cmp ebx, 12
  7729. mov edx, ebx
  7730. jne L_AES_GCM_decrypt_avx1_iv_not_12
  7731. ; # Calculate values when IV is 12 bytes
  7732. ; Set counter based on IV
  7733. mov ecx, 16777216
  7734. vmovq xmm4, QWORD PTR [rax]
  7735. vpinsrd xmm4, xmm4, DWORD PTR [rax+8], 2
  7736. vpinsrd xmm4, xmm4, ecx, 3
  7737. ; H = Encrypt X(=0) and T = Encrypt counter
  7738. vmovdqa xmm5, OWORD PTR [r15]
  7739. vpxor xmm1, xmm4, xmm5
  7740. vmovdqa xmm7, OWORD PTR [r15+16]
  7741. vaesenc xmm5, xmm5, xmm7
  7742. vaesenc xmm1, xmm1, xmm7
  7743. vmovdqa xmm7, OWORD PTR [r15+32]
  7744. vaesenc xmm5, xmm5, xmm7
  7745. vaesenc xmm1, xmm1, xmm7
  7746. vmovdqa xmm7, OWORD PTR [r15+48]
  7747. vaesenc xmm5, xmm5, xmm7
  7748. vaesenc xmm1, xmm1, xmm7
  7749. vmovdqa xmm7, OWORD PTR [r15+64]
  7750. vaesenc xmm5, xmm5, xmm7
  7751. vaesenc xmm1, xmm1, xmm7
  7752. vmovdqa xmm7, OWORD PTR [r15+80]
  7753. vaesenc xmm5, xmm5, xmm7
  7754. vaesenc xmm1, xmm1, xmm7
  7755. vmovdqa xmm7, OWORD PTR [r15+96]
  7756. vaesenc xmm5, xmm5, xmm7
  7757. vaesenc xmm1, xmm1, xmm7
  7758. vmovdqa xmm7, OWORD PTR [r15+112]
  7759. vaesenc xmm5, xmm5, xmm7
  7760. vaesenc xmm1, xmm1, xmm7
  7761. vmovdqa xmm7, OWORD PTR [r15+128]
  7762. vaesenc xmm5, xmm5, xmm7
  7763. vaesenc xmm1, xmm1, xmm7
  7764. vmovdqa xmm7, OWORD PTR [r15+144]
  7765. vaesenc xmm5, xmm5, xmm7
  7766. vaesenc xmm1, xmm1, xmm7
  7767. cmp r10d, 11
  7768. vmovdqa xmm7, OWORD PTR [r15+160]
  7769. jl L_AES_GCM_decrypt_avx1_calc_iv_12_last
  7770. vaesenc xmm5, xmm5, xmm7
  7771. vaesenc xmm1, xmm1, xmm7
  7772. vmovdqa xmm7, OWORD PTR [r15+176]
  7773. vaesenc xmm5, xmm5, xmm7
  7774. vaesenc xmm1, xmm1, xmm7
  7775. cmp r10d, 13
  7776. vmovdqa xmm7, OWORD PTR [r15+192]
  7777. jl L_AES_GCM_decrypt_avx1_calc_iv_12_last
  7778. vaesenc xmm5, xmm5, xmm7
  7779. vaesenc xmm1, xmm1, xmm7
  7780. vmovdqa xmm7, OWORD PTR [r15+208]
  7781. vaesenc xmm5, xmm5, xmm7
  7782. vaesenc xmm1, xmm1, xmm7
  7783. vmovdqa xmm7, OWORD PTR [r15+224]
  7784. L_AES_GCM_decrypt_avx1_calc_iv_12_last:
  7785. vaesenclast xmm5, xmm5, xmm7
  7786. vaesenclast xmm1, xmm1, xmm7
  7787. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7788. vmovdqu OWORD PTR [rsp+144], xmm1
  7789. jmp L_AES_GCM_decrypt_avx1_iv_done
  7790. L_AES_GCM_decrypt_avx1_iv_not_12:
  7791. ; Calculate values when IV is not 12 bytes
  7792. ; H = Encrypt X(=0)
  7793. vmovdqa xmm5, OWORD PTR [r15]
  7794. vaesenc xmm5, xmm5, [r15+16]
  7795. vaesenc xmm5, xmm5, [r15+32]
  7796. vaesenc xmm5, xmm5, [r15+48]
  7797. vaesenc xmm5, xmm5, [r15+64]
  7798. vaesenc xmm5, xmm5, [r15+80]
  7799. vaesenc xmm5, xmm5, [r15+96]
  7800. vaesenc xmm5, xmm5, [r15+112]
  7801. vaesenc xmm5, xmm5, [r15+128]
  7802. vaesenc xmm5, xmm5, [r15+144]
  7803. cmp r10d, 11
  7804. vmovdqa xmm9, OWORD PTR [r15+160]
  7805. jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last
  7806. vaesenc xmm5, xmm5, xmm9
  7807. vaesenc xmm5, xmm5, [r15+176]
  7808. cmp r10d, 13
  7809. vmovdqa xmm9, OWORD PTR [r15+192]
  7810. jl L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last
  7811. vaesenc xmm5, xmm5, xmm9
  7812. vaesenc xmm5, xmm5, [r15+208]
  7813. vmovdqa xmm9, OWORD PTR [r15+224]
  7814. L_AES_GCM_decrypt_avx1_calc_iv_1_aesenc_avx_last:
  7815. vaesenclast xmm5, xmm5, xmm9
  7816. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7817. ; Calc counter
  7818. ; Initialization vector
  7819. cmp edx, 0
  7820. mov rcx, 0
  7821. je L_AES_GCM_decrypt_avx1_calc_iv_done
  7822. cmp edx, 16
  7823. jl L_AES_GCM_decrypt_avx1_calc_iv_lt16
  7824. and edx, 4294967280
  7825. L_AES_GCM_decrypt_avx1_calc_iv_16_loop:
  7826. vmovdqu xmm8, OWORD PTR [rax+rcx]
  7827. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7828. vpxor xmm4, xmm4, xmm8
  7829. ; ghash_gfmul_avx
  7830. vpshufd xmm1, xmm4, 78
  7831. vpshufd xmm2, xmm5, 78
  7832. vpclmulqdq xmm3, xmm5, xmm4, 17
  7833. vpclmulqdq xmm0, xmm5, xmm4, 0
  7834. vpxor xmm1, xmm1, xmm4
  7835. vpxor xmm2, xmm2, xmm5
  7836. vpclmulqdq xmm1, xmm1, xmm2, 0
  7837. vpxor xmm1, xmm1, xmm0
  7838. vpxor xmm1, xmm1, xmm3
  7839. vmovdqa xmm7, xmm0
  7840. vmovdqa xmm4, xmm3
  7841. vpslldq xmm2, xmm1, 8
  7842. vpsrldq xmm1, xmm1, 8
  7843. vpxor xmm7, xmm7, xmm2
  7844. vpxor xmm4, xmm4, xmm1
  7845. vpsrld xmm0, xmm7, 31
  7846. vpsrld xmm1, xmm4, 31
  7847. vpslld xmm7, xmm7, 1
  7848. vpslld xmm4, xmm4, 1
  7849. vpsrldq xmm2, xmm0, 12
  7850. vpslldq xmm0, xmm0, 4
  7851. vpslldq xmm1, xmm1, 4
  7852. vpor xmm4, xmm4, xmm2
  7853. vpor xmm7, xmm7, xmm0
  7854. vpor xmm4, xmm4, xmm1
  7855. vpslld xmm0, xmm7, 31
  7856. vpslld xmm1, xmm7, 30
  7857. vpslld xmm2, xmm7, 25
  7858. vpxor xmm0, xmm0, xmm1
  7859. vpxor xmm0, xmm0, xmm2
  7860. vmovdqa xmm1, xmm0
  7861. vpsrldq xmm1, xmm1, 4
  7862. vpslldq xmm0, xmm0, 12
  7863. vpxor xmm7, xmm7, xmm0
  7864. vpsrld xmm2, xmm7, 1
  7865. vpsrld xmm3, xmm7, 2
  7866. vpsrld xmm0, xmm7, 7
  7867. vpxor xmm2, xmm2, xmm3
  7868. vpxor xmm2, xmm2, xmm0
  7869. vpxor xmm2, xmm2, xmm1
  7870. vpxor xmm2, xmm2, xmm7
  7871. vpxor xmm4, xmm4, xmm2
  7872. add ecx, 16
  7873. cmp ecx, edx
  7874. jl L_AES_GCM_decrypt_avx1_calc_iv_16_loop
  7875. mov edx, ebx
  7876. cmp ecx, edx
  7877. je L_AES_GCM_decrypt_avx1_calc_iv_done
  7878. L_AES_GCM_decrypt_avx1_calc_iv_lt16:
  7879. sub rsp, 16
  7880. vpxor xmm8, xmm8, xmm8
  7881. xor ebx, ebx
  7882. vmovdqu OWORD PTR [rsp], xmm8
  7883. L_AES_GCM_decrypt_avx1_calc_iv_loop:
  7884. movzx r13d, BYTE PTR [rax+rcx]
  7885. mov BYTE PTR [rsp+rbx], r13b
  7886. inc ecx
  7887. inc ebx
  7888. cmp ecx, edx
  7889. jl L_AES_GCM_decrypt_avx1_calc_iv_loop
  7890. vmovdqu xmm8, OWORD PTR [rsp]
  7891. add rsp, 16
  7892. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7893. vpxor xmm4, xmm4, xmm8
  7894. ; ghash_gfmul_avx
  7895. vpshufd xmm1, xmm4, 78
  7896. vpshufd xmm2, xmm5, 78
  7897. vpclmulqdq xmm3, xmm5, xmm4, 17
  7898. vpclmulqdq xmm0, xmm5, xmm4, 0
  7899. vpxor xmm1, xmm1, xmm4
  7900. vpxor xmm2, xmm2, xmm5
  7901. vpclmulqdq xmm1, xmm1, xmm2, 0
  7902. vpxor xmm1, xmm1, xmm0
  7903. vpxor xmm1, xmm1, xmm3
  7904. vmovdqa xmm7, xmm0
  7905. vmovdqa xmm4, xmm3
  7906. vpslldq xmm2, xmm1, 8
  7907. vpsrldq xmm1, xmm1, 8
  7908. vpxor xmm7, xmm7, xmm2
  7909. vpxor xmm4, xmm4, xmm1
  7910. vpsrld xmm0, xmm7, 31
  7911. vpsrld xmm1, xmm4, 31
  7912. vpslld xmm7, xmm7, 1
  7913. vpslld xmm4, xmm4, 1
  7914. vpsrldq xmm2, xmm0, 12
  7915. vpslldq xmm0, xmm0, 4
  7916. vpslldq xmm1, xmm1, 4
  7917. vpor xmm4, xmm4, xmm2
  7918. vpor xmm7, xmm7, xmm0
  7919. vpor xmm4, xmm4, xmm1
  7920. vpslld xmm0, xmm7, 31
  7921. vpslld xmm1, xmm7, 30
  7922. vpslld xmm2, xmm7, 25
  7923. vpxor xmm0, xmm0, xmm1
  7924. vpxor xmm0, xmm0, xmm2
  7925. vmovdqa xmm1, xmm0
  7926. vpsrldq xmm1, xmm1, 4
  7927. vpslldq xmm0, xmm0, 12
  7928. vpxor xmm7, xmm7, xmm0
  7929. vpsrld xmm2, xmm7, 1
  7930. vpsrld xmm3, xmm7, 2
  7931. vpsrld xmm0, xmm7, 7
  7932. vpxor xmm2, xmm2, xmm3
  7933. vpxor xmm2, xmm2, xmm0
  7934. vpxor xmm2, xmm2, xmm1
  7935. vpxor xmm2, xmm2, xmm7
  7936. vpxor xmm4, xmm4, xmm2
  7937. L_AES_GCM_decrypt_avx1_calc_iv_done:
  7938. ; T = Encrypt counter
  7939. vpxor xmm0, xmm0, xmm0
  7940. shl edx, 3
  7941. vmovq xmm0, rdx
  7942. vpxor xmm4, xmm4, xmm0
  7943. ; ghash_gfmul_avx
  7944. vpshufd xmm1, xmm4, 78
  7945. vpshufd xmm2, xmm5, 78
  7946. vpclmulqdq xmm3, xmm5, xmm4, 17
  7947. vpclmulqdq xmm0, xmm5, xmm4, 0
  7948. vpxor xmm1, xmm1, xmm4
  7949. vpxor xmm2, xmm2, xmm5
  7950. vpclmulqdq xmm1, xmm1, xmm2, 0
  7951. vpxor xmm1, xmm1, xmm0
  7952. vpxor xmm1, xmm1, xmm3
  7953. vmovdqa xmm7, xmm0
  7954. vmovdqa xmm4, xmm3
  7955. vpslldq xmm2, xmm1, 8
  7956. vpsrldq xmm1, xmm1, 8
  7957. vpxor xmm7, xmm7, xmm2
  7958. vpxor xmm4, xmm4, xmm1
  7959. vpsrld xmm0, xmm7, 31
  7960. vpsrld xmm1, xmm4, 31
  7961. vpslld xmm7, xmm7, 1
  7962. vpslld xmm4, xmm4, 1
  7963. vpsrldq xmm2, xmm0, 12
  7964. vpslldq xmm0, xmm0, 4
  7965. vpslldq xmm1, xmm1, 4
  7966. vpor xmm4, xmm4, xmm2
  7967. vpor xmm7, xmm7, xmm0
  7968. vpor xmm4, xmm4, xmm1
  7969. vpslld xmm0, xmm7, 31
  7970. vpslld xmm1, xmm7, 30
  7971. vpslld xmm2, xmm7, 25
  7972. vpxor xmm0, xmm0, xmm1
  7973. vpxor xmm0, xmm0, xmm2
  7974. vmovdqa xmm1, xmm0
  7975. vpsrldq xmm1, xmm1, 4
  7976. vpslldq xmm0, xmm0, 12
  7977. vpxor xmm7, xmm7, xmm0
  7978. vpsrld xmm2, xmm7, 1
  7979. vpsrld xmm3, xmm7, 2
  7980. vpsrld xmm0, xmm7, 7
  7981. vpxor xmm2, xmm2, xmm3
  7982. vpxor xmm2, xmm2, xmm0
  7983. vpxor xmm2, xmm2, xmm1
  7984. vpxor xmm2, xmm2, xmm7
  7985. vpxor xmm4, xmm4, xmm2
  7986. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  7987. ; Encrypt counter
  7988. vmovdqa xmm8, OWORD PTR [r15]
  7989. vpxor xmm8, xmm8, xmm4
  7990. vaesenc xmm8, xmm8, [r15+16]
  7991. vaesenc xmm8, xmm8, [r15+32]
  7992. vaesenc xmm8, xmm8, [r15+48]
  7993. vaesenc xmm8, xmm8, [r15+64]
  7994. vaesenc xmm8, xmm8, [r15+80]
  7995. vaesenc xmm8, xmm8, [r15+96]
  7996. vaesenc xmm8, xmm8, [r15+112]
  7997. vaesenc xmm8, xmm8, [r15+128]
  7998. vaesenc xmm8, xmm8, [r15+144]
  7999. cmp r10d, 11
  8000. vmovdqa xmm9, OWORD PTR [r15+160]
  8001. jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last
  8002. vaesenc xmm8, xmm8, xmm9
  8003. vaesenc xmm8, xmm8, [r15+176]
  8004. cmp r10d, 13
  8005. vmovdqa xmm9, OWORD PTR [r15+192]
  8006. jl L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last
  8007. vaesenc xmm8, xmm8, xmm9
  8008. vaesenc xmm8, xmm8, [r15+208]
  8009. vmovdqa xmm9, OWORD PTR [r15+224]
  8010. L_AES_GCM_decrypt_avx1_calc_iv_2_aesenc_avx_last:
  8011. vaesenclast xmm8, xmm8, xmm9
  8012. vmovdqu OWORD PTR [rsp+144], xmm8
  8013. L_AES_GCM_decrypt_avx1_iv_done:
  8014. ; Additional authentication data
  8015. mov edx, r11d
  8016. cmp edx, 0
  8017. je L_AES_GCM_decrypt_avx1_calc_aad_done
  8018. xor ecx, ecx
  8019. cmp edx, 16
  8020. jl L_AES_GCM_decrypt_avx1_calc_aad_lt16
  8021. and edx, 4294967280
  8022. L_AES_GCM_decrypt_avx1_calc_aad_16_loop:
  8023. vmovdqu xmm8, OWORD PTR [r12+rcx]
  8024. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8025. vpxor xmm6, xmm6, xmm8
  8026. ; ghash_gfmul_avx
  8027. vpshufd xmm1, xmm6, 78
  8028. vpshufd xmm2, xmm5, 78
  8029. vpclmulqdq xmm3, xmm5, xmm6, 17
  8030. vpclmulqdq xmm0, xmm5, xmm6, 0
  8031. vpxor xmm1, xmm1, xmm6
  8032. vpxor xmm2, xmm2, xmm5
  8033. vpclmulqdq xmm1, xmm1, xmm2, 0
  8034. vpxor xmm1, xmm1, xmm0
  8035. vpxor xmm1, xmm1, xmm3
  8036. vmovdqa xmm7, xmm0
  8037. vmovdqa xmm6, xmm3
  8038. vpslldq xmm2, xmm1, 8
  8039. vpsrldq xmm1, xmm1, 8
  8040. vpxor xmm7, xmm7, xmm2
  8041. vpxor xmm6, xmm6, xmm1
  8042. vpsrld xmm0, xmm7, 31
  8043. vpsrld xmm1, xmm6, 31
  8044. vpslld xmm7, xmm7, 1
  8045. vpslld xmm6, xmm6, 1
  8046. vpsrldq xmm2, xmm0, 12
  8047. vpslldq xmm0, xmm0, 4
  8048. vpslldq xmm1, xmm1, 4
  8049. vpor xmm6, xmm6, xmm2
  8050. vpor xmm7, xmm7, xmm0
  8051. vpor xmm6, xmm6, xmm1
  8052. vpslld xmm0, xmm7, 31
  8053. vpslld xmm1, xmm7, 30
  8054. vpslld xmm2, xmm7, 25
  8055. vpxor xmm0, xmm0, xmm1
  8056. vpxor xmm0, xmm0, xmm2
  8057. vmovdqa xmm1, xmm0
  8058. vpsrldq xmm1, xmm1, 4
  8059. vpslldq xmm0, xmm0, 12
  8060. vpxor xmm7, xmm7, xmm0
  8061. vpsrld xmm2, xmm7, 1
  8062. vpsrld xmm3, xmm7, 2
  8063. vpsrld xmm0, xmm7, 7
  8064. vpxor xmm2, xmm2, xmm3
  8065. vpxor xmm2, xmm2, xmm0
  8066. vpxor xmm2, xmm2, xmm1
  8067. vpxor xmm2, xmm2, xmm7
  8068. vpxor xmm6, xmm6, xmm2
  8069. add ecx, 16
  8070. cmp ecx, edx
  8071. jl L_AES_GCM_decrypt_avx1_calc_aad_16_loop
  8072. mov edx, r11d
  8073. cmp ecx, edx
  8074. je L_AES_GCM_decrypt_avx1_calc_aad_done
  8075. L_AES_GCM_decrypt_avx1_calc_aad_lt16:
  8076. sub rsp, 16
  8077. vpxor xmm8, xmm8, xmm8
  8078. xor ebx, ebx
  8079. vmovdqu OWORD PTR [rsp], xmm8
  8080. L_AES_GCM_decrypt_avx1_calc_aad_loop:
  8081. movzx r13d, BYTE PTR [r12+rcx]
  8082. mov BYTE PTR [rsp+rbx], r13b
  8083. inc ecx
  8084. inc ebx
  8085. cmp ecx, edx
  8086. jl L_AES_GCM_decrypt_avx1_calc_aad_loop
  8087. vmovdqu xmm8, OWORD PTR [rsp]
  8088. add rsp, 16
  8089. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8090. vpxor xmm6, xmm6, xmm8
  8091. ; ghash_gfmul_avx
  8092. vpshufd xmm1, xmm6, 78
  8093. vpshufd xmm2, xmm5, 78
  8094. vpclmulqdq xmm3, xmm5, xmm6, 17
  8095. vpclmulqdq xmm0, xmm5, xmm6, 0
  8096. vpxor xmm1, xmm1, xmm6
  8097. vpxor xmm2, xmm2, xmm5
  8098. vpclmulqdq xmm1, xmm1, xmm2, 0
  8099. vpxor xmm1, xmm1, xmm0
  8100. vpxor xmm1, xmm1, xmm3
  8101. vmovdqa xmm7, xmm0
  8102. vmovdqa xmm6, xmm3
  8103. vpslldq xmm2, xmm1, 8
  8104. vpsrldq xmm1, xmm1, 8
  8105. vpxor xmm7, xmm7, xmm2
  8106. vpxor xmm6, xmm6, xmm1
  8107. vpsrld xmm0, xmm7, 31
  8108. vpsrld xmm1, xmm6, 31
  8109. vpslld xmm7, xmm7, 1
  8110. vpslld xmm6, xmm6, 1
  8111. vpsrldq xmm2, xmm0, 12
  8112. vpslldq xmm0, xmm0, 4
  8113. vpslldq xmm1, xmm1, 4
  8114. vpor xmm6, xmm6, xmm2
  8115. vpor xmm7, xmm7, xmm0
  8116. vpor xmm6, xmm6, xmm1
  8117. vpslld xmm0, xmm7, 31
  8118. vpslld xmm1, xmm7, 30
  8119. vpslld xmm2, xmm7, 25
  8120. vpxor xmm0, xmm0, xmm1
  8121. vpxor xmm0, xmm0, xmm2
  8122. vmovdqa xmm1, xmm0
  8123. vpsrldq xmm1, xmm1, 4
  8124. vpslldq xmm0, xmm0, 12
  8125. vpxor xmm7, xmm7, xmm0
  8126. vpsrld xmm2, xmm7, 1
  8127. vpsrld xmm3, xmm7, 2
  8128. vpsrld xmm0, xmm7, 7
  8129. vpxor xmm2, xmm2, xmm3
  8130. vpxor xmm2, xmm2, xmm0
  8131. vpxor xmm2, xmm2, xmm1
  8132. vpxor xmm2, xmm2, xmm7
  8133. vpxor xmm6, xmm6, xmm2
  8134. L_AES_GCM_decrypt_avx1_calc_aad_done:
  8135. ; Calculate counter and H
  8136. vpsrlq xmm9, xmm5, 63
  8137. vpsllq xmm8, xmm5, 1
  8138. vpslldq xmm9, xmm9, 8
  8139. vpor xmm8, xmm8, xmm9
  8140. vpshufd xmm5, xmm5, 255
  8141. vpsrad xmm5, xmm5, 31
  8142. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  8143. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  8144. vpaddd xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_one
  8145. vpxor xmm5, xmm5, xmm8
  8146. vmovdqu OWORD PTR [rsp+128], xmm4
  8147. xor ebx, ebx
  8148. cmp r9d, 128
  8149. mov r13d, r9d
  8150. jl L_AES_GCM_decrypt_avx1_done_128
  8151. and r13d, 4294967168
  8152. vmovdqa xmm2, xmm6
  8153. ; H ^ 1
  8154. vmovdqu OWORD PTR [rsp], xmm5
  8155. ; H ^ 2
  8156. vpclmulqdq xmm8, xmm5, xmm5, 0
  8157. vpclmulqdq xmm0, xmm5, xmm5, 17
  8158. vpslld xmm12, xmm8, 31
  8159. vpslld xmm13, xmm8, 30
  8160. vpslld xmm14, xmm8, 25
  8161. vpxor xmm12, xmm12, xmm13
  8162. vpxor xmm12, xmm12, xmm14
  8163. vpsrldq xmm13, xmm12, 4
  8164. vpslldq xmm12, xmm12, 12
  8165. vpxor xmm8, xmm8, xmm12
  8166. vpsrld xmm14, xmm8, 1
  8167. vpsrld xmm10, xmm8, 2
  8168. vpsrld xmm9, xmm8, 7
  8169. vpxor xmm14, xmm14, xmm10
  8170. vpxor xmm14, xmm14, xmm9
  8171. vpxor xmm14, xmm14, xmm13
  8172. vpxor xmm14, xmm14, xmm8
  8173. vpxor xmm0, xmm0, xmm14
  8174. vmovdqu OWORD PTR [rsp+16], xmm0
  8175. ; H ^ 3
  8176. ; ghash_gfmul_red_avx
  8177. vpshufd xmm9, xmm5, 78
  8178. vpshufd xmm10, xmm0, 78
  8179. vpclmulqdq xmm11, xmm0, xmm5, 17
  8180. vpclmulqdq xmm8, xmm0, xmm5, 0
  8181. vpxor xmm9, xmm9, xmm5
  8182. vpxor xmm10, xmm10, xmm0
  8183. vpclmulqdq xmm9, xmm9, xmm10, 0
  8184. vpxor xmm9, xmm9, xmm8
  8185. vpxor xmm9, xmm9, xmm11
  8186. vpslldq xmm10, xmm9, 8
  8187. vpsrldq xmm9, xmm9, 8
  8188. vpxor xmm8, xmm8, xmm10
  8189. vpxor xmm1, xmm11, xmm9
  8190. vpslld xmm12, xmm8, 31
  8191. vpslld xmm13, xmm8, 30
  8192. vpslld xmm14, xmm8, 25
  8193. vpxor xmm12, xmm12, xmm13
  8194. vpxor xmm12, xmm12, xmm14
  8195. vpsrldq xmm13, xmm12, 4
  8196. vpslldq xmm12, xmm12, 12
  8197. vpxor xmm8, xmm8, xmm12
  8198. vpsrld xmm14, xmm8, 1
  8199. vpsrld xmm10, xmm8, 2
  8200. vpsrld xmm9, xmm8, 7
  8201. vpxor xmm14, xmm14, xmm10
  8202. vpxor xmm14, xmm14, xmm9
  8203. vpxor xmm14, xmm14, xmm13
  8204. vpxor xmm14, xmm14, xmm8
  8205. vpxor xmm1, xmm1, xmm14
  8206. vmovdqu OWORD PTR [rsp+32], xmm1
  8207. ; H ^ 4
  8208. vpclmulqdq xmm8, xmm0, xmm0, 0
  8209. vpclmulqdq xmm3, xmm0, xmm0, 17
  8210. vpslld xmm12, xmm8, 31
  8211. vpslld xmm13, xmm8, 30
  8212. vpslld xmm14, xmm8, 25
  8213. vpxor xmm12, xmm12, xmm13
  8214. vpxor xmm12, xmm12, xmm14
  8215. vpsrldq xmm13, xmm12, 4
  8216. vpslldq xmm12, xmm12, 12
  8217. vpxor xmm8, xmm8, xmm12
  8218. vpsrld xmm14, xmm8, 1
  8219. vpsrld xmm10, xmm8, 2
  8220. vpsrld xmm9, xmm8, 7
  8221. vpxor xmm14, xmm14, xmm10
  8222. vpxor xmm14, xmm14, xmm9
  8223. vpxor xmm14, xmm14, xmm13
  8224. vpxor xmm14, xmm14, xmm8
  8225. vpxor xmm3, xmm3, xmm14
  8226. vmovdqu OWORD PTR [rsp+48], xmm3
  8227. ; H ^ 5
  8228. ; ghash_gfmul_red_avx
  8229. vpshufd xmm9, xmm0, 78
  8230. vpshufd xmm10, xmm1, 78
  8231. vpclmulqdq xmm11, xmm1, xmm0, 17
  8232. vpclmulqdq xmm8, xmm1, xmm0, 0
  8233. vpxor xmm9, xmm9, xmm0
  8234. vpxor xmm10, xmm10, xmm1
  8235. vpclmulqdq xmm9, xmm9, xmm10, 0
  8236. vpxor xmm9, xmm9, xmm8
  8237. vpxor xmm9, xmm9, xmm11
  8238. vpslldq xmm10, xmm9, 8
  8239. vpsrldq xmm9, xmm9, 8
  8240. vpxor xmm8, xmm8, xmm10
  8241. vpxor xmm7, xmm11, xmm9
  8242. vpslld xmm12, xmm8, 31
  8243. vpslld xmm13, xmm8, 30
  8244. vpslld xmm14, xmm8, 25
  8245. vpxor xmm12, xmm12, xmm13
  8246. vpxor xmm12, xmm12, xmm14
  8247. vpsrldq xmm13, xmm12, 4
  8248. vpslldq xmm12, xmm12, 12
  8249. vpxor xmm8, xmm8, xmm12
  8250. vpsrld xmm14, xmm8, 1
  8251. vpsrld xmm10, xmm8, 2
  8252. vpsrld xmm9, xmm8, 7
  8253. vpxor xmm14, xmm14, xmm10
  8254. vpxor xmm14, xmm14, xmm9
  8255. vpxor xmm14, xmm14, xmm13
  8256. vpxor xmm14, xmm14, xmm8
  8257. vpxor xmm7, xmm7, xmm14
  8258. vmovdqu OWORD PTR [rsp+64], xmm7
  8259. ; H ^ 6
  8260. vpclmulqdq xmm8, xmm1, xmm1, 0
  8261. vpclmulqdq xmm7, xmm1, xmm1, 17
  8262. vpslld xmm12, xmm8, 31
  8263. vpslld xmm13, xmm8, 30
  8264. vpslld xmm14, xmm8, 25
  8265. vpxor xmm12, xmm12, xmm13
  8266. vpxor xmm12, xmm12, xmm14
  8267. vpsrldq xmm13, xmm12, 4
  8268. vpslldq xmm12, xmm12, 12
  8269. vpxor xmm8, xmm8, xmm12
  8270. vpsrld xmm14, xmm8, 1
  8271. vpsrld xmm10, xmm8, 2
  8272. vpsrld xmm9, xmm8, 7
  8273. vpxor xmm14, xmm14, xmm10
  8274. vpxor xmm14, xmm14, xmm9
  8275. vpxor xmm14, xmm14, xmm13
  8276. vpxor xmm14, xmm14, xmm8
  8277. vpxor xmm7, xmm7, xmm14
  8278. vmovdqu OWORD PTR [rsp+80], xmm7
  8279. ; H ^ 7
  8280. ; ghash_gfmul_red_avx
  8281. vpshufd xmm9, xmm1, 78
  8282. vpshufd xmm10, xmm3, 78
  8283. vpclmulqdq xmm11, xmm3, xmm1, 17
  8284. vpclmulqdq xmm8, xmm3, xmm1, 0
  8285. vpxor xmm9, xmm9, xmm1
  8286. vpxor xmm10, xmm10, xmm3
  8287. vpclmulqdq xmm9, xmm9, xmm10, 0
  8288. vpxor xmm9, xmm9, xmm8
  8289. vpxor xmm9, xmm9, xmm11
  8290. vpslldq xmm10, xmm9, 8
  8291. vpsrldq xmm9, xmm9, 8
  8292. vpxor xmm8, xmm8, xmm10
  8293. vpxor xmm7, xmm11, xmm9
  8294. vpslld xmm12, xmm8, 31
  8295. vpslld xmm13, xmm8, 30
  8296. vpslld xmm14, xmm8, 25
  8297. vpxor xmm12, xmm12, xmm13
  8298. vpxor xmm12, xmm12, xmm14
  8299. vpsrldq xmm13, xmm12, 4
  8300. vpslldq xmm12, xmm12, 12
  8301. vpxor xmm8, xmm8, xmm12
  8302. vpsrld xmm14, xmm8, 1
  8303. vpsrld xmm10, xmm8, 2
  8304. vpsrld xmm9, xmm8, 7
  8305. vpxor xmm14, xmm14, xmm10
  8306. vpxor xmm14, xmm14, xmm9
  8307. vpxor xmm14, xmm14, xmm13
  8308. vpxor xmm14, xmm14, xmm8
  8309. vpxor xmm7, xmm7, xmm14
  8310. vmovdqu OWORD PTR [rsp+96], xmm7
  8311. ; H ^ 8
  8312. vpclmulqdq xmm8, xmm3, xmm3, 0
  8313. vpclmulqdq xmm7, xmm3, xmm3, 17
  8314. vpslld xmm12, xmm8, 31
  8315. vpslld xmm13, xmm8, 30
  8316. vpslld xmm14, xmm8, 25
  8317. vpxor xmm12, xmm12, xmm13
  8318. vpxor xmm12, xmm12, xmm14
  8319. vpsrldq xmm13, xmm12, 4
  8320. vpslldq xmm12, xmm12, 12
  8321. vpxor xmm8, xmm8, xmm12
  8322. vpsrld xmm14, xmm8, 1
  8323. vpsrld xmm10, xmm8, 2
  8324. vpsrld xmm9, xmm8, 7
  8325. vpxor xmm14, xmm14, xmm10
  8326. vpxor xmm14, xmm14, xmm9
  8327. vpxor xmm14, xmm14, xmm13
  8328. vpxor xmm14, xmm14, xmm8
  8329. vpxor xmm7, xmm7, xmm14
  8330. vmovdqu OWORD PTR [rsp+112], xmm7
  8331. L_AES_GCM_decrypt_avx1_ghash_128:
  8332. lea rcx, QWORD PTR [rdi+rbx]
  8333. lea rdx, QWORD PTR [rsi+rbx]
  8334. vmovdqu xmm0, OWORD PTR [rsp+128]
  8335. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  8336. vpshufb xmm8, xmm0, xmm1
  8337. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  8338. vpshufb xmm9, xmm9, xmm1
  8339. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  8340. vpshufb xmm10, xmm10, xmm1
  8341. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  8342. vpshufb xmm11, xmm11, xmm1
  8343. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  8344. vpshufb xmm12, xmm12, xmm1
  8345. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  8346. vpshufb xmm13, xmm13, xmm1
  8347. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  8348. vpshufb xmm14, xmm14, xmm1
  8349. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  8350. vpshufb xmm15, xmm15, xmm1
  8351. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  8352. vmovdqa xmm7, OWORD PTR [r15]
  8353. vmovdqu OWORD PTR [rsp+128], xmm0
  8354. vpxor xmm8, xmm8, xmm7
  8355. vpxor xmm9, xmm9, xmm7
  8356. vpxor xmm10, xmm10, xmm7
  8357. vpxor xmm11, xmm11, xmm7
  8358. vpxor xmm12, xmm12, xmm7
  8359. vpxor xmm13, xmm13, xmm7
  8360. vpxor xmm14, xmm14, xmm7
  8361. vpxor xmm15, xmm15, xmm7
  8362. vmovdqu xmm7, OWORD PTR [rsp+112]
  8363. vmovdqu xmm0, OWORD PTR [rcx]
  8364. vaesenc xmm8, xmm8, [r15+16]
  8365. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8366. vpxor xmm0, xmm0, xmm2
  8367. vpshufd xmm1, xmm7, 78
  8368. vpshufd xmm5, xmm0, 78
  8369. vpxor xmm1, xmm1, xmm7
  8370. vpxor xmm5, xmm5, xmm0
  8371. vpclmulqdq xmm3, xmm0, xmm7, 17
  8372. vaesenc xmm9, xmm9, [r15+16]
  8373. vaesenc xmm10, xmm10, [r15+16]
  8374. vpclmulqdq xmm2, xmm0, xmm7, 0
  8375. vaesenc xmm11, xmm11, [r15+16]
  8376. vaesenc xmm12, xmm12, [r15+16]
  8377. vpclmulqdq xmm1, xmm1, xmm5, 0
  8378. vaesenc xmm13, xmm13, [r15+16]
  8379. vaesenc xmm14, xmm14, [r15+16]
  8380. vaesenc xmm15, xmm15, [r15+16]
  8381. vpxor xmm1, xmm1, xmm2
  8382. vpxor xmm1, xmm1, xmm3
  8383. vmovdqu xmm7, OWORD PTR [rsp+96]
  8384. vmovdqu xmm0, OWORD PTR [rcx+16]
  8385. vpshufd xmm4, xmm7, 78
  8386. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8387. vaesenc xmm8, xmm8, [r15+32]
  8388. vpxor xmm4, xmm4, xmm7
  8389. vpshufd xmm5, xmm0, 78
  8390. vpxor xmm5, xmm5, xmm0
  8391. vpclmulqdq xmm6, xmm0, xmm7, 17
  8392. vaesenc xmm9, xmm9, [r15+32]
  8393. vaesenc xmm10, xmm10, [r15+32]
  8394. vpclmulqdq xmm7, xmm0, xmm7, 0
  8395. vaesenc xmm11, xmm11, [r15+32]
  8396. vaesenc xmm12, xmm12, [r15+32]
  8397. vpclmulqdq xmm4, xmm4, xmm5, 0
  8398. vaesenc xmm13, xmm13, [r15+32]
  8399. vaesenc xmm14, xmm14, [r15+32]
  8400. vaesenc xmm15, xmm15, [r15+32]
  8401. vpxor xmm1, xmm1, xmm7
  8402. vpxor xmm2, xmm2, xmm7
  8403. vpxor xmm1, xmm1, xmm6
  8404. vpxor xmm3, xmm3, xmm6
  8405. vpxor xmm1, xmm1, xmm4
  8406. vmovdqu xmm7, OWORD PTR [rsp+80]
  8407. vmovdqu xmm0, OWORD PTR [rcx+32]
  8408. vpshufd xmm4, xmm7, 78
  8409. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8410. vaesenc xmm8, xmm8, [r15+48]
  8411. vpxor xmm4, xmm4, xmm7
  8412. vpshufd xmm5, xmm0, 78
  8413. vpxor xmm5, xmm5, xmm0
  8414. vpclmulqdq xmm6, xmm0, xmm7, 17
  8415. vaesenc xmm9, xmm9, [r15+48]
  8416. vaesenc xmm10, xmm10, [r15+48]
  8417. vpclmulqdq xmm7, xmm0, xmm7, 0
  8418. vaesenc xmm11, xmm11, [r15+48]
  8419. vaesenc xmm12, xmm12, [r15+48]
  8420. vpclmulqdq xmm4, xmm4, xmm5, 0
  8421. vaesenc xmm13, xmm13, [r15+48]
  8422. vaesenc xmm14, xmm14, [r15+48]
  8423. vaesenc xmm15, xmm15, [r15+48]
  8424. vpxor xmm1, xmm1, xmm7
  8425. vpxor xmm2, xmm2, xmm7
  8426. vpxor xmm1, xmm1, xmm6
  8427. vpxor xmm3, xmm3, xmm6
  8428. vpxor xmm1, xmm1, xmm4
  8429. vmovdqu xmm7, OWORD PTR [rsp+64]
  8430. vmovdqu xmm0, OWORD PTR [rcx+48]
  8431. vpshufd xmm4, xmm7, 78
  8432. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8433. vaesenc xmm8, xmm8, [r15+64]
  8434. vpxor xmm4, xmm4, xmm7
  8435. vpshufd xmm5, xmm0, 78
  8436. vpxor xmm5, xmm5, xmm0
  8437. vpclmulqdq xmm6, xmm0, xmm7, 17
  8438. vaesenc xmm9, xmm9, [r15+64]
  8439. vaesenc xmm10, xmm10, [r15+64]
  8440. vpclmulqdq xmm7, xmm0, xmm7, 0
  8441. vaesenc xmm11, xmm11, [r15+64]
  8442. vaesenc xmm12, xmm12, [r15+64]
  8443. vpclmulqdq xmm4, xmm4, xmm5, 0
  8444. vaesenc xmm13, xmm13, [r15+64]
  8445. vaesenc xmm14, xmm14, [r15+64]
  8446. vaesenc xmm15, xmm15, [r15+64]
  8447. vpxor xmm1, xmm1, xmm7
  8448. vpxor xmm2, xmm2, xmm7
  8449. vpxor xmm1, xmm1, xmm6
  8450. vpxor xmm3, xmm3, xmm6
  8451. vpxor xmm1, xmm1, xmm4
  8452. vmovdqu xmm7, OWORD PTR [rsp+48]
  8453. vmovdqu xmm0, OWORD PTR [rcx+64]
  8454. vpshufd xmm4, xmm7, 78
  8455. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8456. vaesenc xmm8, xmm8, [r15+80]
  8457. vpxor xmm4, xmm4, xmm7
  8458. vpshufd xmm5, xmm0, 78
  8459. vpxor xmm5, xmm5, xmm0
  8460. vpclmulqdq xmm6, xmm0, xmm7, 17
  8461. vaesenc xmm9, xmm9, [r15+80]
  8462. vaesenc xmm10, xmm10, [r15+80]
  8463. vpclmulqdq xmm7, xmm0, xmm7, 0
  8464. vaesenc xmm11, xmm11, [r15+80]
  8465. vaesenc xmm12, xmm12, [r15+80]
  8466. vpclmulqdq xmm4, xmm4, xmm5, 0
  8467. vaesenc xmm13, xmm13, [r15+80]
  8468. vaesenc xmm14, xmm14, [r15+80]
  8469. vaesenc xmm15, xmm15, [r15+80]
  8470. vpxor xmm1, xmm1, xmm7
  8471. vpxor xmm2, xmm2, xmm7
  8472. vpxor xmm1, xmm1, xmm6
  8473. vpxor xmm3, xmm3, xmm6
  8474. vpxor xmm1, xmm1, xmm4
  8475. vmovdqu xmm7, OWORD PTR [rsp+32]
  8476. vmovdqu xmm0, OWORD PTR [rcx+80]
  8477. vpshufd xmm4, xmm7, 78
  8478. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8479. vaesenc xmm8, xmm8, [r15+96]
  8480. vpxor xmm4, xmm4, xmm7
  8481. vpshufd xmm5, xmm0, 78
  8482. vpxor xmm5, xmm5, xmm0
  8483. vpclmulqdq xmm6, xmm0, xmm7, 17
  8484. vaesenc xmm9, xmm9, [r15+96]
  8485. vaesenc xmm10, xmm10, [r15+96]
  8486. vpclmulqdq xmm7, xmm0, xmm7, 0
  8487. vaesenc xmm11, xmm11, [r15+96]
  8488. vaesenc xmm12, xmm12, [r15+96]
  8489. vpclmulqdq xmm4, xmm4, xmm5, 0
  8490. vaesenc xmm13, xmm13, [r15+96]
  8491. vaesenc xmm14, xmm14, [r15+96]
  8492. vaesenc xmm15, xmm15, [r15+96]
  8493. vpxor xmm1, xmm1, xmm7
  8494. vpxor xmm2, xmm2, xmm7
  8495. vpxor xmm1, xmm1, xmm6
  8496. vpxor xmm3, xmm3, xmm6
  8497. vpxor xmm1, xmm1, xmm4
  8498. vmovdqu xmm7, OWORD PTR [rsp+16]
  8499. vmovdqu xmm0, OWORD PTR [rcx+96]
  8500. vpshufd xmm4, xmm7, 78
  8501. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8502. vaesenc xmm8, xmm8, [r15+112]
  8503. vpxor xmm4, xmm4, xmm7
  8504. vpshufd xmm5, xmm0, 78
  8505. vpxor xmm5, xmm5, xmm0
  8506. vpclmulqdq xmm6, xmm0, xmm7, 17
  8507. vaesenc xmm9, xmm9, [r15+112]
  8508. vaesenc xmm10, xmm10, [r15+112]
  8509. vpclmulqdq xmm7, xmm0, xmm7, 0
  8510. vaesenc xmm11, xmm11, [r15+112]
  8511. vaesenc xmm12, xmm12, [r15+112]
  8512. vpclmulqdq xmm4, xmm4, xmm5, 0
  8513. vaesenc xmm13, xmm13, [r15+112]
  8514. vaesenc xmm14, xmm14, [r15+112]
  8515. vaesenc xmm15, xmm15, [r15+112]
  8516. vpxor xmm1, xmm1, xmm7
  8517. vpxor xmm2, xmm2, xmm7
  8518. vpxor xmm1, xmm1, xmm6
  8519. vpxor xmm3, xmm3, xmm6
  8520. vpxor xmm1, xmm1, xmm4
  8521. vmovdqu xmm7, OWORD PTR [rsp]
  8522. vmovdqu xmm0, OWORD PTR [rcx+112]
  8523. vpshufd xmm4, xmm7, 78
  8524. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8525. vaesenc xmm8, xmm8, [r15+128]
  8526. vpxor xmm4, xmm4, xmm7
  8527. vpshufd xmm5, xmm0, 78
  8528. vpxor xmm5, xmm5, xmm0
  8529. vpclmulqdq xmm6, xmm0, xmm7, 17
  8530. vaesenc xmm9, xmm9, [r15+128]
  8531. vaesenc xmm10, xmm10, [r15+128]
  8532. vpclmulqdq xmm7, xmm0, xmm7, 0
  8533. vaesenc xmm11, xmm11, [r15+128]
  8534. vaesenc xmm12, xmm12, [r15+128]
  8535. vpclmulqdq xmm4, xmm4, xmm5, 0
  8536. vaesenc xmm13, xmm13, [r15+128]
  8537. vaesenc xmm14, xmm14, [r15+128]
  8538. vaesenc xmm15, xmm15, [r15+128]
  8539. vpxor xmm1, xmm1, xmm7
  8540. vpxor xmm2, xmm2, xmm7
  8541. vpxor xmm1, xmm1, xmm6
  8542. vpxor xmm3, xmm3, xmm6
  8543. vpxor xmm1, xmm1, xmm4
  8544. vpslldq xmm5, xmm1, 8
  8545. vpsrldq xmm1, xmm1, 8
  8546. vaesenc xmm8, xmm8, [r15+144]
  8547. vpxor xmm2, xmm2, xmm5
  8548. vpxor xmm3, xmm3, xmm1
  8549. vaesenc xmm9, xmm9, [r15+144]
  8550. vpslld xmm7, xmm2, 31
  8551. vpslld xmm4, xmm2, 30
  8552. vpslld xmm5, xmm2, 25
  8553. vaesenc xmm10, xmm10, [r15+144]
  8554. vpxor xmm7, xmm7, xmm4
  8555. vpxor xmm7, xmm7, xmm5
  8556. vaesenc xmm11, xmm11, [r15+144]
  8557. vpsrldq xmm4, xmm7, 4
  8558. vpslldq xmm7, xmm7, 12
  8559. vaesenc xmm12, xmm12, [r15+144]
  8560. vpxor xmm2, xmm2, xmm7
  8561. vpsrld xmm5, xmm2, 1
  8562. vaesenc xmm13, xmm13, [r15+144]
  8563. vpsrld xmm1, xmm2, 2
  8564. vpsrld xmm0, xmm2, 7
  8565. vaesenc xmm14, xmm14, [r15+144]
  8566. vpxor xmm5, xmm5, xmm1
  8567. vpxor xmm5, xmm5, xmm0
  8568. vaesenc xmm15, xmm15, [r15+144]
  8569. vpxor xmm5, xmm5, xmm4
  8570. vpxor xmm2, xmm2, xmm5
  8571. vpxor xmm2, xmm2, xmm3
  8572. cmp r10d, 11
  8573. vmovdqa xmm7, OWORD PTR [r15+160]
  8574. jl L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done
  8575. vaesenc xmm8, xmm8, xmm7
  8576. vaesenc xmm9, xmm9, xmm7
  8577. vaesenc xmm10, xmm10, xmm7
  8578. vaesenc xmm11, xmm11, xmm7
  8579. vaesenc xmm12, xmm12, xmm7
  8580. vaesenc xmm13, xmm13, xmm7
  8581. vaesenc xmm14, xmm14, xmm7
  8582. vaesenc xmm15, xmm15, xmm7
  8583. vmovdqa xmm7, OWORD PTR [r15+176]
  8584. vaesenc xmm8, xmm8, xmm7
  8585. vaesenc xmm9, xmm9, xmm7
  8586. vaesenc xmm10, xmm10, xmm7
  8587. vaesenc xmm11, xmm11, xmm7
  8588. vaesenc xmm12, xmm12, xmm7
  8589. vaesenc xmm13, xmm13, xmm7
  8590. vaesenc xmm14, xmm14, xmm7
  8591. vaesenc xmm15, xmm15, xmm7
  8592. cmp r10d, 13
  8593. vmovdqa xmm7, OWORD PTR [r15+192]
  8594. jl L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done
  8595. vaesenc xmm8, xmm8, xmm7
  8596. vaesenc xmm9, xmm9, xmm7
  8597. vaesenc xmm10, xmm10, xmm7
  8598. vaesenc xmm11, xmm11, xmm7
  8599. vaesenc xmm12, xmm12, xmm7
  8600. vaesenc xmm13, xmm13, xmm7
  8601. vaesenc xmm14, xmm14, xmm7
  8602. vaesenc xmm15, xmm15, xmm7
  8603. vmovdqa xmm7, OWORD PTR [r15+208]
  8604. vaesenc xmm8, xmm8, xmm7
  8605. vaesenc xmm9, xmm9, xmm7
  8606. vaesenc xmm10, xmm10, xmm7
  8607. vaesenc xmm11, xmm11, xmm7
  8608. vaesenc xmm12, xmm12, xmm7
  8609. vaesenc xmm13, xmm13, xmm7
  8610. vaesenc xmm14, xmm14, xmm7
  8611. vaesenc xmm15, xmm15, xmm7
  8612. vmovdqa xmm7, OWORD PTR [r15+224]
  8613. L_AES_GCM_decrypt_avx1_aesenc_128_ghash_avx_done:
  8614. vaesenclast xmm8, xmm8, xmm7
  8615. vaesenclast xmm9, xmm9, xmm7
  8616. vmovdqu xmm0, OWORD PTR [rcx]
  8617. vmovdqu xmm1, OWORD PTR [rcx+16]
  8618. vpxor xmm8, xmm8, xmm0
  8619. vpxor xmm9, xmm9, xmm1
  8620. vmovdqu OWORD PTR [rdx], xmm8
  8621. vmovdqu OWORD PTR [rdx+16], xmm9
  8622. vaesenclast xmm10, xmm10, xmm7
  8623. vaesenclast xmm11, xmm11, xmm7
  8624. vmovdqu xmm0, OWORD PTR [rcx+32]
  8625. vmovdqu xmm1, OWORD PTR [rcx+48]
  8626. vpxor xmm10, xmm10, xmm0
  8627. vpxor xmm11, xmm11, xmm1
  8628. vmovdqu OWORD PTR [rdx+32], xmm10
  8629. vmovdqu OWORD PTR [rdx+48], xmm11
  8630. vaesenclast xmm12, xmm12, xmm7
  8631. vaesenclast xmm13, xmm13, xmm7
  8632. vmovdqu xmm0, OWORD PTR [rcx+64]
  8633. vmovdqu xmm1, OWORD PTR [rcx+80]
  8634. vpxor xmm12, xmm12, xmm0
  8635. vpxor xmm13, xmm13, xmm1
  8636. vmovdqu OWORD PTR [rdx+64], xmm12
  8637. vmovdqu OWORD PTR [rdx+80], xmm13
  8638. vaesenclast xmm14, xmm14, xmm7
  8639. vaesenclast xmm15, xmm15, xmm7
  8640. vmovdqu xmm0, OWORD PTR [rcx+96]
  8641. vmovdqu xmm1, OWORD PTR [rcx+112]
  8642. vpxor xmm14, xmm14, xmm0
  8643. vpxor xmm15, xmm15, xmm1
  8644. vmovdqu OWORD PTR [rdx+96], xmm14
  8645. vmovdqu OWORD PTR [rdx+112], xmm15
  8646. add ebx, 128
  8647. cmp ebx, r13d
  8648. jl L_AES_GCM_decrypt_avx1_ghash_128
  8649. vmovdqa xmm6, xmm2
  8650. vmovdqu xmm5, OWORD PTR [rsp]
  8651. L_AES_GCM_decrypt_avx1_done_128:
  8652. mov edx, r9d
  8653. cmp ebx, edx
  8654. jge L_AES_GCM_decrypt_avx1_done_dec
  8655. mov r13d, r9d
  8656. and r13d, 4294967280
  8657. cmp ebx, r13d
  8658. jge L_AES_GCM_decrypt_avx1_last_block_done
  8659. L_AES_GCM_decrypt_avx1_last_block_start:
  8660. vmovdqu xmm13, OWORD PTR [rdi+rbx]
  8661. vmovdqa xmm0, xmm5
  8662. vpshufb xmm1, xmm13, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8663. vpxor xmm1, xmm1, xmm6
  8664. vmovdqu xmm9, OWORD PTR [rsp+128]
  8665. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  8666. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  8667. vmovdqu OWORD PTR [rsp+128], xmm9
  8668. vpxor xmm8, xmm8, [r15]
  8669. vpclmulqdq xmm10, xmm1, xmm0, 16
  8670. vaesenc xmm8, xmm8, [r15+16]
  8671. vaesenc xmm8, xmm8, [r15+32]
  8672. vpclmulqdq xmm11, xmm1, xmm0, 1
  8673. vaesenc xmm8, xmm8, [r15+48]
  8674. vaesenc xmm8, xmm8, [r15+64]
  8675. vpclmulqdq xmm12, xmm1, xmm0, 0
  8676. vaesenc xmm8, xmm8, [r15+80]
  8677. vpclmulqdq xmm1, xmm1, xmm0, 17
  8678. vaesenc xmm8, xmm8, [r15+96]
  8679. vpxor xmm10, xmm10, xmm11
  8680. vpslldq xmm2, xmm10, 8
  8681. vpsrldq xmm10, xmm10, 8
  8682. vaesenc xmm8, xmm8, [r15+112]
  8683. vpxor xmm2, xmm2, xmm12
  8684. vpxor xmm3, xmm1, xmm10
  8685. vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
  8686. vpclmulqdq xmm11, xmm2, xmm0, 16
  8687. vaesenc xmm8, xmm8, [r15+128]
  8688. vpshufd xmm10, xmm2, 78
  8689. vpxor xmm10, xmm10, xmm11
  8690. vpclmulqdq xmm11, xmm10, xmm0, 16
  8691. vaesenc xmm8, xmm8, [r15+144]
  8692. vpshufd xmm10, xmm10, 78
  8693. vpxor xmm10, xmm10, xmm11
  8694. vpxor xmm6, xmm10, xmm3
  8695. cmp r10d, 11
  8696. vmovdqa xmm9, OWORD PTR [r15+160]
  8697. jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last
  8698. vaesenc xmm8, xmm8, xmm9
  8699. vaesenc xmm8, xmm8, [r15+176]
  8700. cmp r10d, 13
  8701. vmovdqa xmm9, OWORD PTR [r15+192]
  8702. jl L_AES_GCM_decrypt_avx1_aesenc_gfmul_last
  8703. vaesenc xmm8, xmm8, xmm9
  8704. vaesenc xmm8, xmm8, [r15+208]
  8705. vmovdqa xmm9, OWORD PTR [r15+224]
  8706. L_AES_GCM_decrypt_avx1_aesenc_gfmul_last:
  8707. vaesenclast xmm8, xmm8, xmm9
  8708. vmovdqa xmm0, xmm13
  8709. vpxor xmm8, xmm8, xmm0
  8710. vmovdqu OWORD PTR [rsi+rbx], xmm8
  8711. add ebx, 16
  8712. cmp ebx, r13d
  8713. jl L_AES_GCM_decrypt_avx1_last_block_start
  8714. L_AES_GCM_decrypt_avx1_last_block_done:
  8715. mov ecx, r9d
  8716. mov edx, ecx
  8717. and ecx, 15
  8718. jz L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done
  8719. vmovdqu xmm4, OWORD PTR [rsp+128]
  8720. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  8721. vpxor xmm4, xmm4, [r15]
  8722. vaesenc xmm4, xmm4, [r15+16]
  8723. vaesenc xmm4, xmm4, [r15+32]
  8724. vaesenc xmm4, xmm4, [r15+48]
  8725. vaesenc xmm4, xmm4, [r15+64]
  8726. vaesenc xmm4, xmm4, [r15+80]
  8727. vaesenc xmm4, xmm4, [r15+96]
  8728. vaesenc xmm4, xmm4, [r15+112]
  8729. vaesenc xmm4, xmm4, [r15+128]
  8730. vaesenc xmm4, xmm4, [r15+144]
  8731. cmp r10d, 11
  8732. vmovdqa xmm9, OWORD PTR [r15+160]
  8733. jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last
  8734. vaesenc xmm4, xmm4, xmm9
  8735. vaesenc xmm4, xmm4, [r15+176]
  8736. cmp r10d, 13
  8737. vmovdqa xmm9, OWORD PTR [r15+192]
  8738. jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last
  8739. vaesenc xmm4, xmm4, xmm9
  8740. vaesenc xmm4, xmm4, [r15+208]
  8741. vmovdqa xmm9, OWORD PTR [r15+224]
  8742. L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_aesenc_avx_last:
  8743. vaesenclast xmm4, xmm4, xmm9
  8744. sub rsp, 32
  8745. xor ecx, ecx
  8746. vmovdqu OWORD PTR [rsp], xmm4
  8747. vpxor xmm0, xmm0, xmm0
  8748. vmovdqu OWORD PTR [rsp+16], xmm0
  8749. L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop:
  8750. movzx r13d, BYTE PTR [rdi+rbx]
  8751. mov BYTE PTR [rsp+rcx+16], r13b
  8752. xor r13b, BYTE PTR [rsp+rcx]
  8753. mov BYTE PTR [rsi+rbx], r13b
  8754. inc ebx
  8755. inc ecx
  8756. cmp ebx, edx
  8757. jl L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_loop
  8758. vmovdqu xmm4, OWORD PTR [rsp+16]
  8759. add rsp, 32
  8760. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8761. vpxor xmm6, xmm6, xmm4
  8762. ; ghash_gfmul_red_avx
  8763. vpshufd xmm9, xmm5, 78
  8764. vpshufd xmm10, xmm6, 78
  8765. vpclmulqdq xmm11, xmm6, xmm5, 17
  8766. vpclmulqdq xmm8, xmm6, xmm5, 0
  8767. vpxor xmm9, xmm9, xmm5
  8768. vpxor xmm10, xmm10, xmm6
  8769. vpclmulqdq xmm9, xmm9, xmm10, 0
  8770. vpxor xmm9, xmm9, xmm8
  8771. vpxor xmm9, xmm9, xmm11
  8772. vpslldq xmm10, xmm9, 8
  8773. vpsrldq xmm9, xmm9, 8
  8774. vpxor xmm8, xmm8, xmm10
  8775. vpxor xmm6, xmm11, xmm9
  8776. vpslld xmm12, xmm8, 31
  8777. vpslld xmm13, xmm8, 30
  8778. vpslld xmm14, xmm8, 25
  8779. vpxor xmm12, xmm12, xmm13
  8780. vpxor xmm12, xmm12, xmm14
  8781. vpsrldq xmm13, xmm12, 4
  8782. vpslldq xmm12, xmm12, 12
  8783. vpxor xmm8, xmm8, xmm12
  8784. vpsrld xmm14, xmm8, 1
  8785. vpsrld xmm10, xmm8, 2
  8786. vpsrld xmm9, xmm8, 7
  8787. vpxor xmm14, xmm14, xmm10
  8788. vpxor xmm14, xmm14, xmm9
  8789. vpxor xmm14, xmm14, xmm13
  8790. vpxor xmm14, xmm14, xmm8
  8791. vpxor xmm6, xmm6, xmm14
  8792. L_AES_GCM_decrypt_avx1_aesenc_last15_dec_avx_done:
  8793. L_AES_GCM_decrypt_avx1_done_dec:
  8794. mov edx, r9d
  8795. mov ecx, r11d
  8796. shl rdx, 3
  8797. shl rcx, 3
  8798. vmovq xmm0, rdx
  8799. vmovq xmm1, rcx
  8800. vpunpcklqdq xmm0, xmm0, xmm1
  8801. vpxor xmm6, xmm6, xmm0
  8802. ; ghash_gfmul_red_avx
  8803. vpshufd xmm9, xmm5, 78
  8804. vpshufd xmm10, xmm6, 78
  8805. vpclmulqdq xmm11, xmm6, xmm5, 17
  8806. vpclmulqdq xmm8, xmm6, xmm5, 0
  8807. vpxor xmm9, xmm9, xmm5
  8808. vpxor xmm10, xmm10, xmm6
  8809. vpclmulqdq xmm9, xmm9, xmm10, 0
  8810. vpxor xmm9, xmm9, xmm8
  8811. vpxor xmm9, xmm9, xmm11
  8812. vpslldq xmm10, xmm9, 8
  8813. vpsrldq xmm9, xmm9, 8
  8814. vpxor xmm8, xmm8, xmm10
  8815. vpxor xmm6, xmm11, xmm9
  8816. vpslld xmm12, xmm8, 31
  8817. vpslld xmm13, xmm8, 30
  8818. vpslld xmm14, xmm8, 25
  8819. vpxor xmm12, xmm12, xmm13
  8820. vpxor xmm12, xmm12, xmm14
  8821. vpsrldq xmm13, xmm12, 4
  8822. vpslldq xmm12, xmm12, 12
  8823. vpxor xmm8, xmm8, xmm12
  8824. vpsrld xmm14, xmm8, 1
  8825. vpsrld xmm10, xmm8, 2
  8826. vpsrld xmm9, xmm8, 7
  8827. vpxor xmm14, xmm14, xmm10
  8828. vpxor xmm14, xmm14, xmm9
  8829. vpxor xmm14, xmm14, xmm13
  8830. vpxor xmm14, xmm14, xmm8
  8831. vpxor xmm6, xmm6, xmm14
  8832. vpshufb xmm6, xmm6, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8833. vmovdqu xmm0, OWORD PTR [rsp+144]
  8834. vpxor xmm0, xmm0, xmm6
  8835. cmp r14d, 16
  8836. je L_AES_GCM_decrypt_avx1_cmp_tag_16
  8837. sub rsp, 16
  8838. xor rcx, rcx
  8839. xor rbx, rbx
  8840. vmovdqu OWORD PTR [rsp], xmm0
  8841. L_AES_GCM_decrypt_avx1_cmp_tag_loop:
  8842. movzx r13d, BYTE PTR [rsp+rcx]
  8843. xor r13b, BYTE PTR [r8+rcx]
  8844. or bl, r13b
  8845. inc ecx
  8846. cmp ecx, r14d
  8847. jne L_AES_GCM_decrypt_avx1_cmp_tag_loop
  8848. cmp rbx, 0
  8849. sete bl
  8850. add rsp, 16
  8851. xor rcx, rcx
  8852. jmp L_AES_GCM_decrypt_avx1_cmp_tag_done
  8853. L_AES_GCM_decrypt_avx1_cmp_tag_16:
  8854. vmovdqu xmm1, OWORD PTR [r8]
  8855. vpcmpeqb xmm0, xmm0, xmm1
  8856. vpmovmskb rdx, xmm0
  8857. ; %%edx == 0xFFFF then return 1 else => return 0
  8858. xor ebx, ebx
  8859. cmp edx, 65535
  8860. sete bl
  8861. L_AES_GCM_decrypt_avx1_cmp_tag_done:
  8862. mov DWORD PTR [rbp], ebx
  8863. vzeroupper
  8864. add rsp, 168
  8865. pop rbp
  8866. pop r15
  8867. pop r14
  8868. pop rbx
  8869. pop r12
  8870. pop rsi
  8871. pop rdi
  8872. pop r13
  8873. ret
  8874. AES_GCM_decrypt_avx1 ENDP
  8875. _text ENDS
  8876. _text SEGMENT READONLY PARA
  8877. AES_GCM_init_avx1 PROC
  8878. push rdi
  8879. push rsi
  8880. push r12
  8881. push r13
  8882. mov rdi, rcx
  8883. mov rsi, rdx
  8884. mov r10, r8
  8885. mov r11d, r9d
  8886. mov rax, QWORD PTR [rsp+72]
  8887. mov r8, QWORD PTR [rsp+80]
  8888. mov r9, QWORD PTR [rsp+88]
  8889. sub rsp, 16
  8890. vpxor xmm4, xmm4, xmm4
  8891. mov edx, r11d
  8892. cmp edx, 12
  8893. jne L_AES_GCM_init_avx1_iv_not_12
  8894. ; # Calculate values when IV is 12 bytes
  8895. ; Set counter based on IV
  8896. mov ecx, 16777216
  8897. vmovq xmm4, QWORD PTR [r10]
  8898. vpinsrd xmm4, xmm4, DWORD PTR [r10+8], 2
  8899. vpinsrd xmm4, xmm4, ecx, 3
  8900. ; H = Encrypt X(=0) and T = Encrypt counter
  8901. vmovdqa xmm5, OWORD PTR [rdi]
  8902. vpxor xmm1, xmm4, xmm5
  8903. vmovdqa xmm7, OWORD PTR [rdi+16]
  8904. vaesenc xmm5, xmm5, xmm7
  8905. vaesenc xmm1, xmm1, xmm7
  8906. vmovdqa xmm7, OWORD PTR [rdi+32]
  8907. vaesenc xmm5, xmm5, xmm7
  8908. vaesenc xmm1, xmm1, xmm7
  8909. vmovdqa xmm7, OWORD PTR [rdi+48]
  8910. vaesenc xmm5, xmm5, xmm7
  8911. vaesenc xmm1, xmm1, xmm7
  8912. vmovdqa xmm7, OWORD PTR [rdi+64]
  8913. vaesenc xmm5, xmm5, xmm7
  8914. vaesenc xmm1, xmm1, xmm7
  8915. vmovdqa xmm7, OWORD PTR [rdi+80]
  8916. vaesenc xmm5, xmm5, xmm7
  8917. vaesenc xmm1, xmm1, xmm7
  8918. vmovdqa xmm7, OWORD PTR [rdi+96]
  8919. vaesenc xmm5, xmm5, xmm7
  8920. vaesenc xmm1, xmm1, xmm7
  8921. vmovdqa xmm7, OWORD PTR [rdi+112]
  8922. vaesenc xmm5, xmm5, xmm7
  8923. vaesenc xmm1, xmm1, xmm7
  8924. vmovdqa xmm7, OWORD PTR [rdi+128]
  8925. vaesenc xmm5, xmm5, xmm7
  8926. vaesenc xmm1, xmm1, xmm7
  8927. vmovdqa xmm7, OWORD PTR [rdi+144]
  8928. vaesenc xmm5, xmm5, xmm7
  8929. vaesenc xmm1, xmm1, xmm7
  8930. cmp esi, 11
  8931. vmovdqa xmm7, OWORD PTR [rdi+160]
  8932. jl L_AES_GCM_init_avx1_calc_iv_12_last
  8933. vaesenc xmm5, xmm5, xmm7
  8934. vaesenc xmm1, xmm1, xmm7
  8935. vmovdqa xmm7, OWORD PTR [rdi+176]
  8936. vaesenc xmm5, xmm5, xmm7
  8937. vaesenc xmm1, xmm1, xmm7
  8938. cmp esi, 13
  8939. vmovdqa xmm7, OWORD PTR [rdi+192]
  8940. jl L_AES_GCM_init_avx1_calc_iv_12_last
  8941. vaesenc xmm5, xmm5, xmm7
  8942. vaesenc xmm1, xmm1, xmm7
  8943. vmovdqa xmm7, OWORD PTR [rdi+208]
  8944. vaesenc xmm5, xmm5, xmm7
  8945. vaesenc xmm1, xmm1, xmm7
  8946. vmovdqa xmm7, OWORD PTR [rdi+224]
  8947. L_AES_GCM_init_avx1_calc_iv_12_last:
  8948. vaesenclast xmm5, xmm5, xmm7
  8949. vaesenclast xmm1, xmm1, xmm7
  8950. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8951. vmovdqu xmm15, xmm1
  8952. jmp L_AES_GCM_init_avx1_iv_done
  8953. L_AES_GCM_init_avx1_iv_not_12:
  8954. ; Calculate values when IV is not 12 bytes
  8955. ; H = Encrypt X(=0)
  8956. vmovdqa xmm5, OWORD PTR [rdi]
  8957. vaesenc xmm5, xmm5, [rdi+16]
  8958. vaesenc xmm5, xmm5, [rdi+32]
  8959. vaesenc xmm5, xmm5, [rdi+48]
  8960. vaesenc xmm5, xmm5, [rdi+64]
  8961. vaesenc xmm5, xmm5, [rdi+80]
  8962. vaesenc xmm5, xmm5, [rdi+96]
  8963. vaesenc xmm5, xmm5, [rdi+112]
  8964. vaesenc xmm5, xmm5, [rdi+128]
  8965. vaesenc xmm5, xmm5, [rdi+144]
  8966. cmp esi, 11
  8967. vmovdqa xmm9, OWORD PTR [rdi+160]
  8968. jl L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last
  8969. vaesenc xmm5, xmm5, xmm9
  8970. vaesenc xmm5, xmm5, [rdi+176]
  8971. cmp esi, 13
  8972. vmovdqa xmm9, OWORD PTR [rdi+192]
  8973. jl L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last
  8974. vaesenc xmm5, xmm5, xmm9
  8975. vaesenc xmm5, xmm5, [rdi+208]
  8976. vmovdqa xmm9, OWORD PTR [rdi+224]
  8977. L_AES_GCM_init_avx1_calc_iv_1_aesenc_avx_last:
  8978. vaesenclast xmm5, xmm5, xmm9
  8979. vpshufb xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8980. ; Calc counter
  8981. ; Initialization vector
  8982. cmp edx, 0
  8983. mov rcx, 0
  8984. je L_AES_GCM_init_avx1_calc_iv_done
  8985. cmp edx, 16
  8986. jl L_AES_GCM_init_avx1_calc_iv_lt16
  8987. and edx, 4294967280
  8988. L_AES_GCM_init_avx1_calc_iv_16_loop:
  8989. vmovdqu xmm8, OWORD PTR [r10+rcx]
  8990. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  8991. vpxor xmm4, xmm4, xmm8
  8992. ; ghash_gfmul_avx
  8993. vpshufd xmm1, xmm4, 78
  8994. vpshufd xmm2, xmm5, 78
  8995. vpclmulqdq xmm3, xmm5, xmm4, 17
  8996. vpclmulqdq xmm0, xmm5, xmm4, 0
  8997. vpxor xmm1, xmm1, xmm4
  8998. vpxor xmm2, xmm2, xmm5
  8999. vpclmulqdq xmm1, xmm1, xmm2, 0
  9000. vpxor xmm1, xmm1, xmm0
  9001. vpxor xmm1, xmm1, xmm3
  9002. vmovdqa xmm7, xmm0
  9003. vmovdqa xmm4, xmm3
  9004. vpslldq xmm2, xmm1, 8
  9005. vpsrldq xmm1, xmm1, 8
  9006. vpxor xmm7, xmm7, xmm2
  9007. vpxor xmm4, xmm4, xmm1
  9008. vpsrld xmm0, xmm7, 31
  9009. vpsrld xmm1, xmm4, 31
  9010. vpslld xmm7, xmm7, 1
  9011. vpslld xmm4, xmm4, 1
  9012. vpsrldq xmm2, xmm0, 12
  9013. vpslldq xmm0, xmm0, 4
  9014. vpslldq xmm1, xmm1, 4
  9015. vpor xmm4, xmm4, xmm2
  9016. vpor xmm7, xmm7, xmm0
  9017. vpor xmm4, xmm4, xmm1
  9018. vpslld xmm0, xmm7, 31
  9019. vpslld xmm1, xmm7, 30
  9020. vpslld xmm2, xmm7, 25
  9021. vpxor xmm0, xmm0, xmm1
  9022. vpxor xmm0, xmm0, xmm2
  9023. vmovdqa xmm1, xmm0
  9024. vpsrldq xmm1, xmm1, 4
  9025. vpslldq xmm0, xmm0, 12
  9026. vpxor xmm7, xmm7, xmm0
  9027. vpsrld xmm2, xmm7, 1
  9028. vpsrld xmm3, xmm7, 2
  9029. vpsrld xmm0, xmm7, 7
  9030. vpxor xmm2, xmm2, xmm3
  9031. vpxor xmm2, xmm2, xmm0
  9032. vpxor xmm2, xmm2, xmm1
  9033. vpxor xmm2, xmm2, xmm7
  9034. vpxor xmm4, xmm4, xmm2
  9035. add ecx, 16
  9036. cmp ecx, edx
  9037. jl L_AES_GCM_init_avx1_calc_iv_16_loop
  9038. mov edx, r11d
  9039. cmp ecx, edx
  9040. je L_AES_GCM_init_avx1_calc_iv_done
  9041. L_AES_GCM_init_avx1_calc_iv_lt16:
  9042. sub rsp, 16
  9043. vpxor xmm8, xmm8, xmm8
  9044. xor r13d, r13d
  9045. vmovdqu OWORD PTR [rsp], xmm8
  9046. L_AES_GCM_init_avx1_calc_iv_loop:
  9047. movzx r12d, BYTE PTR [r10+rcx]
  9048. mov BYTE PTR [rsp+r13], r12b
  9049. inc ecx
  9050. inc r13d
  9051. cmp ecx, edx
  9052. jl L_AES_GCM_init_avx1_calc_iv_loop
  9053. vmovdqu xmm8, OWORD PTR [rsp]
  9054. add rsp, 16
  9055. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9056. vpxor xmm4, xmm4, xmm8
  9057. ; ghash_gfmul_avx
  9058. vpshufd xmm1, xmm4, 78
  9059. vpshufd xmm2, xmm5, 78
  9060. vpclmulqdq xmm3, xmm5, xmm4, 17
  9061. vpclmulqdq xmm0, xmm5, xmm4, 0
  9062. vpxor xmm1, xmm1, xmm4
  9063. vpxor xmm2, xmm2, xmm5
  9064. vpclmulqdq xmm1, xmm1, xmm2, 0
  9065. vpxor xmm1, xmm1, xmm0
  9066. vpxor xmm1, xmm1, xmm3
  9067. vmovdqa xmm7, xmm0
  9068. vmovdqa xmm4, xmm3
  9069. vpslldq xmm2, xmm1, 8
  9070. vpsrldq xmm1, xmm1, 8
  9071. vpxor xmm7, xmm7, xmm2
  9072. vpxor xmm4, xmm4, xmm1
  9073. vpsrld xmm0, xmm7, 31
  9074. vpsrld xmm1, xmm4, 31
  9075. vpslld xmm7, xmm7, 1
  9076. vpslld xmm4, xmm4, 1
  9077. vpsrldq xmm2, xmm0, 12
  9078. vpslldq xmm0, xmm0, 4
  9079. vpslldq xmm1, xmm1, 4
  9080. vpor xmm4, xmm4, xmm2
  9081. vpor xmm7, xmm7, xmm0
  9082. vpor xmm4, xmm4, xmm1
  9083. vpslld xmm0, xmm7, 31
  9084. vpslld xmm1, xmm7, 30
  9085. vpslld xmm2, xmm7, 25
  9086. vpxor xmm0, xmm0, xmm1
  9087. vpxor xmm0, xmm0, xmm2
  9088. vmovdqa xmm1, xmm0
  9089. vpsrldq xmm1, xmm1, 4
  9090. vpslldq xmm0, xmm0, 12
  9091. vpxor xmm7, xmm7, xmm0
  9092. vpsrld xmm2, xmm7, 1
  9093. vpsrld xmm3, xmm7, 2
  9094. vpsrld xmm0, xmm7, 7
  9095. vpxor xmm2, xmm2, xmm3
  9096. vpxor xmm2, xmm2, xmm0
  9097. vpxor xmm2, xmm2, xmm1
  9098. vpxor xmm2, xmm2, xmm7
  9099. vpxor xmm4, xmm4, xmm2
  9100. L_AES_GCM_init_avx1_calc_iv_done:
  9101. ; T = Encrypt counter
  9102. vpxor xmm0, xmm0, xmm0
  9103. shl edx, 3
  9104. vmovq xmm0, rdx
  9105. vpxor xmm4, xmm4, xmm0
  9106. ; ghash_gfmul_avx
  9107. vpshufd xmm1, xmm4, 78
  9108. vpshufd xmm2, xmm5, 78
  9109. vpclmulqdq xmm3, xmm5, xmm4, 17
  9110. vpclmulqdq xmm0, xmm5, xmm4, 0
  9111. vpxor xmm1, xmm1, xmm4
  9112. vpxor xmm2, xmm2, xmm5
  9113. vpclmulqdq xmm1, xmm1, xmm2, 0
  9114. vpxor xmm1, xmm1, xmm0
  9115. vpxor xmm1, xmm1, xmm3
  9116. vmovdqa xmm7, xmm0
  9117. vmovdqa xmm4, xmm3
  9118. vpslldq xmm2, xmm1, 8
  9119. vpsrldq xmm1, xmm1, 8
  9120. vpxor xmm7, xmm7, xmm2
  9121. vpxor xmm4, xmm4, xmm1
  9122. vpsrld xmm0, xmm7, 31
  9123. vpsrld xmm1, xmm4, 31
  9124. vpslld xmm7, xmm7, 1
  9125. vpslld xmm4, xmm4, 1
  9126. vpsrldq xmm2, xmm0, 12
  9127. vpslldq xmm0, xmm0, 4
  9128. vpslldq xmm1, xmm1, 4
  9129. vpor xmm4, xmm4, xmm2
  9130. vpor xmm7, xmm7, xmm0
  9131. vpor xmm4, xmm4, xmm1
  9132. vpslld xmm0, xmm7, 31
  9133. vpslld xmm1, xmm7, 30
  9134. vpslld xmm2, xmm7, 25
  9135. vpxor xmm0, xmm0, xmm1
  9136. vpxor xmm0, xmm0, xmm2
  9137. vmovdqa xmm1, xmm0
  9138. vpsrldq xmm1, xmm1, 4
  9139. vpslldq xmm0, xmm0, 12
  9140. vpxor xmm7, xmm7, xmm0
  9141. vpsrld xmm2, xmm7, 1
  9142. vpsrld xmm3, xmm7, 2
  9143. vpsrld xmm0, xmm7, 7
  9144. vpxor xmm2, xmm2, xmm3
  9145. vpxor xmm2, xmm2, xmm0
  9146. vpxor xmm2, xmm2, xmm1
  9147. vpxor xmm2, xmm2, xmm7
  9148. vpxor xmm4, xmm4, xmm2
  9149. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9150. ; Encrypt counter
  9151. vmovdqa xmm8, OWORD PTR [rdi]
  9152. vpxor xmm8, xmm8, xmm4
  9153. vaesenc xmm8, xmm8, [rdi+16]
  9154. vaesenc xmm8, xmm8, [rdi+32]
  9155. vaesenc xmm8, xmm8, [rdi+48]
  9156. vaesenc xmm8, xmm8, [rdi+64]
  9157. vaesenc xmm8, xmm8, [rdi+80]
  9158. vaesenc xmm8, xmm8, [rdi+96]
  9159. vaesenc xmm8, xmm8, [rdi+112]
  9160. vaesenc xmm8, xmm8, [rdi+128]
  9161. vaesenc xmm8, xmm8, [rdi+144]
  9162. cmp esi, 11
  9163. vmovdqa xmm9, OWORD PTR [rdi+160]
  9164. jl L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last
  9165. vaesenc xmm8, xmm8, xmm9
  9166. vaesenc xmm8, xmm8, [rdi+176]
  9167. cmp esi, 13
  9168. vmovdqa xmm9, OWORD PTR [rdi+192]
  9169. jl L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last
  9170. vaesenc xmm8, xmm8, xmm9
  9171. vaesenc xmm8, xmm8, [rdi+208]
  9172. vmovdqa xmm9, OWORD PTR [rdi+224]
  9173. L_AES_GCM_init_avx1_calc_iv_2_aesenc_avx_last:
  9174. vaesenclast xmm8, xmm8, xmm9
  9175. vmovdqu xmm15, xmm8
  9176. L_AES_GCM_init_avx1_iv_done:
  9177. vmovdqa OWORD PTR [r9], xmm15
  9178. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  9179. vpaddd xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_one
  9180. vmovdqa OWORD PTR [rax], xmm5
  9181. vmovdqa OWORD PTR [r8], xmm4
  9182. vzeroupper
  9183. add rsp, 16
  9184. pop r13
  9185. pop r12
  9186. pop rsi
  9187. pop rdi
  9188. ret
  9189. AES_GCM_init_avx1 ENDP
  9190. _text ENDS
  9191. _text SEGMENT READONLY PARA
  9192. AES_GCM_aad_update_avx1 PROC
  9193. mov rax, rcx
  9194. vmovdqa xmm5, OWORD PTR [r8]
  9195. vmovdqa xmm6, OWORD PTR [r9]
  9196. xor ecx, ecx
  9197. L_AES_GCM_aad_update_avx1_16_loop:
  9198. vmovdqu xmm8, OWORD PTR [rax+rcx]
  9199. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9200. vpxor xmm5, xmm5, xmm8
  9201. ; ghash_gfmul_avx
  9202. vpshufd xmm1, xmm5, 78
  9203. vpshufd xmm2, xmm6, 78
  9204. vpclmulqdq xmm3, xmm6, xmm5, 17
  9205. vpclmulqdq xmm0, xmm6, xmm5, 0
  9206. vpxor xmm1, xmm1, xmm5
  9207. vpxor xmm2, xmm2, xmm6
  9208. vpclmulqdq xmm1, xmm1, xmm2, 0
  9209. vpxor xmm1, xmm1, xmm0
  9210. vpxor xmm1, xmm1, xmm3
  9211. vmovdqa xmm4, xmm0
  9212. vmovdqa xmm5, xmm3
  9213. vpslldq xmm2, xmm1, 8
  9214. vpsrldq xmm1, xmm1, 8
  9215. vpxor xmm4, xmm4, xmm2
  9216. vpxor xmm5, xmm5, xmm1
  9217. vpsrld xmm0, xmm4, 31
  9218. vpsrld xmm1, xmm5, 31
  9219. vpslld xmm4, xmm4, 1
  9220. vpslld xmm5, xmm5, 1
  9221. vpsrldq xmm2, xmm0, 12
  9222. vpslldq xmm0, xmm0, 4
  9223. vpslldq xmm1, xmm1, 4
  9224. vpor xmm5, xmm5, xmm2
  9225. vpor xmm4, xmm4, xmm0
  9226. vpor xmm5, xmm5, xmm1
  9227. vpslld xmm0, xmm4, 31
  9228. vpslld xmm1, xmm4, 30
  9229. vpslld xmm2, xmm4, 25
  9230. vpxor xmm0, xmm0, xmm1
  9231. vpxor xmm0, xmm0, xmm2
  9232. vmovdqa xmm1, xmm0
  9233. vpsrldq xmm1, xmm1, 4
  9234. vpslldq xmm0, xmm0, 12
  9235. vpxor xmm4, xmm4, xmm0
  9236. vpsrld xmm2, xmm4, 1
  9237. vpsrld xmm3, xmm4, 2
  9238. vpsrld xmm0, xmm4, 7
  9239. vpxor xmm2, xmm2, xmm3
  9240. vpxor xmm2, xmm2, xmm0
  9241. vpxor xmm2, xmm2, xmm1
  9242. vpxor xmm2, xmm2, xmm4
  9243. vpxor xmm5, xmm5, xmm2
  9244. add ecx, 16
  9245. cmp ecx, edx
  9246. jl L_AES_GCM_aad_update_avx1_16_loop
  9247. vmovdqa OWORD PTR [r8], xmm5
  9248. vzeroupper
  9249. ret
  9250. AES_GCM_aad_update_avx1 ENDP
  9251. _text ENDS
  9252. _text SEGMENT READONLY PARA
  9253. AES_GCM_encrypt_block_avx1 PROC
  9254. mov r10, r8
  9255. mov r11, r9
  9256. mov rax, QWORD PTR [rsp+40]
  9257. vmovdqu xmm9, OWORD PTR [rax]
  9258. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  9259. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  9260. vmovdqu OWORD PTR [rax], xmm9
  9261. vpxor xmm8, xmm8, [rcx]
  9262. vaesenc xmm8, xmm8, [rcx+16]
  9263. vaesenc xmm8, xmm8, [rcx+32]
  9264. vaesenc xmm8, xmm8, [rcx+48]
  9265. vaesenc xmm8, xmm8, [rcx+64]
  9266. vaesenc xmm8, xmm8, [rcx+80]
  9267. vaesenc xmm8, xmm8, [rcx+96]
  9268. vaesenc xmm8, xmm8, [rcx+112]
  9269. vaesenc xmm8, xmm8, [rcx+128]
  9270. vaesenc xmm8, xmm8, [rcx+144]
  9271. cmp edx, 11
  9272. vmovdqa xmm9, OWORD PTR [rcx+160]
  9273. jl L_AES_GCM_encrypt_block_avx1_aesenc_block_last
  9274. vaesenc xmm8, xmm8, xmm9
  9275. vaesenc xmm8, xmm8, [rcx+176]
  9276. cmp edx, 13
  9277. vmovdqa xmm9, OWORD PTR [rcx+192]
  9278. jl L_AES_GCM_encrypt_block_avx1_aesenc_block_last
  9279. vaesenc xmm8, xmm8, xmm9
  9280. vaesenc xmm8, xmm8, [rcx+208]
  9281. vmovdqa xmm9, OWORD PTR [rcx+224]
  9282. L_AES_GCM_encrypt_block_avx1_aesenc_block_last:
  9283. vaesenclast xmm8, xmm8, xmm9
  9284. vmovdqu xmm9, OWORD PTR [r11]
  9285. vpxor xmm8, xmm8, xmm9
  9286. vmovdqu OWORD PTR [r10], xmm8
  9287. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9288. vzeroupper
  9289. ret
  9290. AES_GCM_encrypt_block_avx1 ENDP
  9291. _text ENDS
  9292. _text SEGMENT READONLY PARA
  9293. AES_GCM_ghash_block_avx1 PROC
  9294. vmovdqa xmm4, OWORD PTR [rdx]
  9295. vmovdqa xmm5, OWORD PTR [r8]
  9296. vmovdqu xmm8, OWORD PTR [rcx]
  9297. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9298. vpxor xmm4, xmm4, xmm8
  9299. ; ghash_gfmul_avx
  9300. vpshufd xmm1, xmm4, 78
  9301. vpshufd xmm2, xmm5, 78
  9302. vpclmulqdq xmm3, xmm5, xmm4, 17
  9303. vpclmulqdq xmm0, xmm5, xmm4, 0
  9304. vpxor xmm1, xmm1, xmm4
  9305. vpxor xmm2, xmm2, xmm5
  9306. vpclmulqdq xmm1, xmm1, xmm2, 0
  9307. vpxor xmm1, xmm1, xmm0
  9308. vpxor xmm1, xmm1, xmm3
  9309. vmovdqa xmm6, xmm0
  9310. vmovdqa xmm4, xmm3
  9311. vpslldq xmm2, xmm1, 8
  9312. vpsrldq xmm1, xmm1, 8
  9313. vpxor xmm6, xmm6, xmm2
  9314. vpxor xmm4, xmm4, xmm1
  9315. vpsrld xmm0, xmm6, 31
  9316. vpsrld xmm1, xmm4, 31
  9317. vpslld xmm6, xmm6, 1
  9318. vpslld xmm4, xmm4, 1
  9319. vpsrldq xmm2, xmm0, 12
  9320. vpslldq xmm0, xmm0, 4
  9321. vpslldq xmm1, xmm1, 4
  9322. vpor xmm4, xmm4, xmm2
  9323. vpor xmm6, xmm6, xmm0
  9324. vpor xmm4, xmm4, xmm1
  9325. vpslld xmm0, xmm6, 31
  9326. vpslld xmm1, xmm6, 30
  9327. vpslld xmm2, xmm6, 25
  9328. vpxor xmm0, xmm0, xmm1
  9329. vpxor xmm0, xmm0, xmm2
  9330. vmovdqa xmm1, xmm0
  9331. vpsrldq xmm1, xmm1, 4
  9332. vpslldq xmm0, xmm0, 12
  9333. vpxor xmm6, xmm6, xmm0
  9334. vpsrld xmm2, xmm6, 1
  9335. vpsrld xmm3, xmm6, 2
  9336. vpsrld xmm0, xmm6, 7
  9337. vpxor xmm2, xmm2, xmm3
  9338. vpxor xmm2, xmm2, xmm0
  9339. vpxor xmm2, xmm2, xmm1
  9340. vpxor xmm2, xmm2, xmm6
  9341. vpxor xmm4, xmm4, xmm2
  9342. vmovdqa OWORD PTR [rdx], xmm4
  9343. vzeroupper
  9344. ret
  9345. AES_GCM_ghash_block_avx1 ENDP
  9346. _text ENDS
  9347. _text SEGMENT READONLY PARA
  9348. AES_GCM_encrypt_update_avx1 PROC
  9349. push r13
  9350. push r12
  9351. push r14
  9352. push r15
  9353. push rdi
  9354. mov rax, rcx
  9355. mov r10, r8
  9356. mov r8d, edx
  9357. mov r11, r9
  9358. mov r9d, DWORD PTR [rsp+80]
  9359. mov r12, QWORD PTR [rsp+88]
  9360. mov r14, QWORD PTR [rsp+96]
  9361. mov r15, QWORD PTR [rsp+104]
  9362. sub rsp, 160
  9363. vmovdqa xmm6, OWORD PTR [r12]
  9364. vmovdqa xmm5, OWORD PTR [r14]
  9365. vpsrlq xmm9, xmm5, 63
  9366. vpsllq xmm8, xmm5, 1
  9367. vpslldq xmm9, xmm9, 8
  9368. vpor xmm8, xmm8, xmm9
  9369. vpshufd xmm5, xmm5, 255
  9370. vpsrad xmm5, xmm5, 31
  9371. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  9372. vpxor xmm5, xmm5, xmm8
  9373. xor edi, edi
  9374. cmp r9d, 128
  9375. mov r13d, r9d
  9376. jl L_AES_GCM_encrypt_update_avx1_done_128
  9377. and r13d, 4294967168
  9378. vmovdqa xmm2, xmm6
  9379. ; H ^ 1
  9380. vmovdqu OWORD PTR [rsp], xmm5
  9381. ; H ^ 2
  9382. vpclmulqdq xmm8, xmm5, xmm5, 0
  9383. vpclmulqdq xmm0, xmm5, xmm5, 17
  9384. vpslld xmm12, xmm8, 31
  9385. vpslld xmm13, xmm8, 30
  9386. vpslld xmm14, xmm8, 25
  9387. vpxor xmm12, xmm12, xmm13
  9388. vpxor xmm12, xmm12, xmm14
  9389. vpsrldq xmm13, xmm12, 4
  9390. vpslldq xmm12, xmm12, 12
  9391. vpxor xmm8, xmm8, xmm12
  9392. vpsrld xmm14, xmm8, 1
  9393. vpsrld xmm10, xmm8, 2
  9394. vpsrld xmm9, xmm8, 7
  9395. vpxor xmm14, xmm14, xmm10
  9396. vpxor xmm14, xmm14, xmm9
  9397. vpxor xmm14, xmm14, xmm13
  9398. vpxor xmm14, xmm14, xmm8
  9399. vpxor xmm0, xmm0, xmm14
  9400. vmovdqu OWORD PTR [rsp+16], xmm0
  9401. ; H ^ 3
  9402. ; ghash_gfmul_red_avx
  9403. vpshufd xmm9, xmm5, 78
  9404. vpshufd xmm10, xmm0, 78
  9405. vpclmulqdq xmm11, xmm0, xmm5, 17
  9406. vpclmulqdq xmm8, xmm0, xmm5, 0
  9407. vpxor xmm9, xmm9, xmm5
  9408. vpxor xmm10, xmm10, xmm0
  9409. vpclmulqdq xmm9, xmm9, xmm10, 0
  9410. vpxor xmm9, xmm9, xmm8
  9411. vpxor xmm9, xmm9, xmm11
  9412. vpslldq xmm10, xmm9, 8
  9413. vpsrldq xmm9, xmm9, 8
  9414. vpxor xmm8, xmm8, xmm10
  9415. vpxor xmm1, xmm11, xmm9
  9416. vpslld xmm12, xmm8, 31
  9417. vpslld xmm13, xmm8, 30
  9418. vpslld xmm14, xmm8, 25
  9419. vpxor xmm12, xmm12, xmm13
  9420. vpxor xmm12, xmm12, xmm14
  9421. vpsrldq xmm13, xmm12, 4
  9422. vpslldq xmm12, xmm12, 12
  9423. vpxor xmm8, xmm8, xmm12
  9424. vpsrld xmm14, xmm8, 1
  9425. vpsrld xmm10, xmm8, 2
  9426. vpsrld xmm9, xmm8, 7
  9427. vpxor xmm14, xmm14, xmm10
  9428. vpxor xmm14, xmm14, xmm9
  9429. vpxor xmm14, xmm14, xmm13
  9430. vpxor xmm14, xmm14, xmm8
  9431. vpxor xmm1, xmm1, xmm14
  9432. vmovdqu OWORD PTR [rsp+32], xmm1
  9433. ; H ^ 4
  9434. vpclmulqdq xmm8, xmm0, xmm0, 0
  9435. vpclmulqdq xmm3, xmm0, xmm0, 17
  9436. vpslld xmm12, xmm8, 31
  9437. vpslld xmm13, xmm8, 30
  9438. vpslld xmm14, xmm8, 25
  9439. vpxor xmm12, xmm12, xmm13
  9440. vpxor xmm12, xmm12, xmm14
  9441. vpsrldq xmm13, xmm12, 4
  9442. vpslldq xmm12, xmm12, 12
  9443. vpxor xmm8, xmm8, xmm12
  9444. vpsrld xmm14, xmm8, 1
  9445. vpsrld xmm10, xmm8, 2
  9446. vpsrld xmm9, xmm8, 7
  9447. vpxor xmm14, xmm14, xmm10
  9448. vpxor xmm14, xmm14, xmm9
  9449. vpxor xmm14, xmm14, xmm13
  9450. vpxor xmm14, xmm14, xmm8
  9451. vpxor xmm3, xmm3, xmm14
  9452. vmovdqu OWORD PTR [rsp+48], xmm3
  9453. ; H ^ 5
  9454. ; ghash_gfmul_red_avx
  9455. vpshufd xmm9, xmm0, 78
  9456. vpshufd xmm10, xmm1, 78
  9457. vpclmulqdq xmm11, xmm1, xmm0, 17
  9458. vpclmulqdq xmm8, xmm1, xmm0, 0
  9459. vpxor xmm9, xmm9, xmm0
  9460. vpxor xmm10, xmm10, xmm1
  9461. vpclmulqdq xmm9, xmm9, xmm10, 0
  9462. vpxor xmm9, xmm9, xmm8
  9463. vpxor xmm9, xmm9, xmm11
  9464. vpslldq xmm10, xmm9, 8
  9465. vpsrldq xmm9, xmm9, 8
  9466. vpxor xmm8, xmm8, xmm10
  9467. vpxor xmm7, xmm11, xmm9
  9468. vpslld xmm12, xmm8, 31
  9469. vpslld xmm13, xmm8, 30
  9470. vpslld xmm14, xmm8, 25
  9471. vpxor xmm12, xmm12, xmm13
  9472. vpxor xmm12, xmm12, xmm14
  9473. vpsrldq xmm13, xmm12, 4
  9474. vpslldq xmm12, xmm12, 12
  9475. vpxor xmm8, xmm8, xmm12
  9476. vpsrld xmm14, xmm8, 1
  9477. vpsrld xmm10, xmm8, 2
  9478. vpsrld xmm9, xmm8, 7
  9479. vpxor xmm14, xmm14, xmm10
  9480. vpxor xmm14, xmm14, xmm9
  9481. vpxor xmm14, xmm14, xmm13
  9482. vpxor xmm14, xmm14, xmm8
  9483. vpxor xmm7, xmm7, xmm14
  9484. vmovdqu OWORD PTR [rsp+64], xmm7
  9485. ; H ^ 6
  9486. vpclmulqdq xmm8, xmm1, xmm1, 0
  9487. vpclmulqdq xmm7, xmm1, xmm1, 17
  9488. vpslld xmm12, xmm8, 31
  9489. vpslld xmm13, xmm8, 30
  9490. vpslld xmm14, xmm8, 25
  9491. vpxor xmm12, xmm12, xmm13
  9492. vpxor xmm12, xmm12, xmm14
  9493. vpsrldq xmm13, xmm12, 4
  9494. vpslldq xmm12, xmm12, 12
  9495. vpxor xmm8, xmm8, xmm12
  9496. vpsrld xmm14, xmm8, 1
  9497. vpsrld xmm10, xmm8, 2
  9498. vpsrld xmm9, xmm8, 7
  9499. vpxor xmm14, xmm14, xmm10
  9500. vpxor xmm14, xmm14, xmm9
  9501. vpxor xmm14, xmm14, xmm13
  9502. vpxor xmm14, xmm14, xmm8
  9503. vpxor xmm7, xmm7, xmm14
  9504. vmovdqu OWORD PTR [rsp+80], xmm7
  9505. ; H ^ 7
  9506. ; ghash_gfmul_red_avx
  9507. vpshufd xmm9, xmm1, 78
  9508. vpshufd xmm10, xmm3, 78
  9509. vpclmulqdq xmm11, xmm3, xmm1, 17
  9510. vpclmulqdq xmm8, xmm3, xmm1, 0
  9511. vpxor xmm9, xmm9, xmm1
  9512. vpxor xmm10, xmm10, xmm3
  9513. vpclmulqdq xmm9, xmm9, xmm10, 0
  9514. vpxor xmm9, xmm9, xmm8
  9515. vpxor xmm9, xmm9, xmm11
  9516. vpslldq xmm10, xmm9, 8
  9517. vpsrldq xmm9, xmm9, 8
  9518. vpxor xmm8, xmm8, xmm10
  9519. vpxor xmm7, xmm11, xmm9
  9520. vpslld xmm12, xmm8, 31
  9521. vpslld xmm13, xmm8, 30
  9522. vpslld xmm14, xmm8, 25
  9523. vpxor xmm12, xmm12, xmm13
  9524. vpxor xmm12, xmm12, xmm14
  9525. vpsrldq xmm13, xmm12, 4
  9526. vpslldq xmm12, xmm12, 12
  9527. vpxor xmm8, xmm8, xmm12
  9528. vpsrld xmm14, xmm8, 1
  9529. vpsrld xmm10, xmm8, 2
  9530. vpsrld xmm9, xmm8, 7
  9531. vpxor xmm14, xmm14, xmm10
  9532. vpxor xmm14, xmm14, xmm9
  9533. vpxor xmm14, xmm14, xmm13
  9534. vpxor xmm14, xmm14, xmm8
  9535. vpxor xmm7, xmm7, xmm14
  9536. vmovdqu OWORD PTR [rsp+96], xmm7
  9537. ; H ^ 8
  9538. vpclmulqdq xmm8, xmm3, xmm3, 0
  9539. vpclmulqdq xmm7, xmm3, xmm3, 17
  9540. vpslld xmm12, xmm8, 31
  9541. vpslld xmm13, xmm8, 30
  9542. vpslld xmm14, xmm8, 25
  9543. vpxor xmm12, xmm12, xmm13
  9544. vpxor xmm12, xmm12, xmm14
  9545. vpsrldq xmm13, xmm12, 4
  9546. vpslldq xmm12, xmm12, 12
  9547. vpxor xmm8, xmm8, xmm12
  9548. vpsrld xmm14, xmm8, 1
  9549. vpsrld xmm10, xmm8, 2
  9550. vpsrld xmm9, xmm8, 7
  9551. vpxor xmm14, xmm14, xmm10
  9552. vpxor xmm14, xmm14, xmm9
  9553. vpxor xmm14, xmm14, xmm13
  9554. vpxor xmm14, xmm14, xmm8
  9555. vpxor xmm7, xmm7, xmm14
  9556. vmovdqu OWORD PTR [rsp+112], xmm7
  9557. ; First 128 bytes of input
  9558. vmovdqu xmm0, OWORD PTR [r15]
  9559. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  9560. vpshufb xmm8, xmm0, xmm1
  9561. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  9562. vpshufb xmm9, xmm9, xmm1
  9563. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  9564. vpshufb xmm10, xmm10, xmm1
  9565. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  9566. vpshufb xmm11, xmm11, xmm1
  9567. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  9568. vpshufb xmm12, xmm12, xmm1
  9569. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  9570. vpshufb xmm13, xmm13, xmm1
  9571. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  9572. vpshufb xmm14, xmm14, xmm1
  9573. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  9574. vpshufb xmm15, xmm15, xmm1
  9575. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  9576. vmovdqa xmm7, OWORD PTR [rax]
  9577. vmovdqu OWORD PTR [r15], xmm0
  9578. vpxor xmm8, xmm8, xmm7
  9579. vpxor xmm9, xmm9, xmm7
  9580. vpxor xmm10, xmm10, xmm7
  9581. vpxor xmm11, xmm11, xmm7
  9582. vpxor xmm12, xmm12, xmm7
  9583. vpxor xmm13, xmm13, xmm7
  9584. vpxor xmm14, xmm14, xmm7
  9585. vpxor xmm15, xmm15, xmm7
  9586. vmovdqa xmm7, OWORD PTR [rax+16]
  9587. vaesenc xmm8, xmm8, xmm7
  9588. vaesenc xmm9, xmm9, xmm7
  9589. vaesenc xmm10, xmm10, xmm7
  9590. vaesenc xmm11, xmm11, xmm7
  9591. vaesenc xmm12, xmm12, xmm7
  9592. vaesenc xmm13, xmm13, xmm7
  9593. vaesenc xmm14, xmm14, xmm7
  9594. vaesenc xmm15, xmm15, xmm7
  9595. vmovdqa xmm7, OWORD PTR [rax+32]
  9596. vaesenc xmm8, xmm8, xmm7
  9597. vaesenc xmm9, xmm9, xmm7
  9598. vaesenc xmm10, xmm10, xmm7
  9599. vaesenc xmm11, xmm11, xmm7
  9600. vaesenc xmm12, xmm12, xmm7
  9601. vaesenc xmm13, xmm13, xmm7
  9602. vaesenc xmm14, xmm14, xmm7
  9603. vaesenc xmm15, xmm15, xmm7
  9604. vmovdqa xmm7, OWORD PTR [rax+48]
  9605. vaesenc xmm8, xmm8, xmm7
  9606. vaesenc xmm9, xmm9, xmm7
  9607. vaesenc xmm10, xmm10, xmm7
  9608. vaesenc xmm11, xmm11, xmm7
  9609. vaesenc xmm12, xmm12, xmm7
  9610. vaesenc xmm13, xmm13, xmm7
  9611. vaesenc xmm14, xmm14, xmm7
  9612. vaesenc xmm15, xmm15, xmm7
  9613. vmovdqa xmm7, OWORD PTR [rax+64]
  9614. vaesenc xmm8, xmm8, xmm7
  9615. vaesenc xmm9, xmm9, xmm7
  9616. vaesenc xmm10, xmm10, xmm7
  9617. vaesenc xmm11, xmm11, xmm7
  9618. vaesenc xmm12, xmm12, xmm7
  9619. vaesenc xmm13, xmm13, xmm7
  9620. vaesenc xmm14, xmm14, xmm7
  9621. vaesenc xmm15, xmm15, xmm7
  9622. vmovdqa xmm7, OWORD PTR [rax+80]
  9623. vaesenc xmm8, xmm8, xmm7
  9624. vaesenc xmm9, xmm9, xmm7
  9625. vaesenc xmm10, xmm10, xmm7
  9626. vaesenc xmm11, xmm11, xmm7
  9627. vaesenc xmm12, xmm12, xmm7
  9628. vaesenc xmm13, xmm13, xmm7
  9629. vaesenc xmm14, xmm14, xmm7
  9630. vaesenc xmm15, xmm15, xmm7
  9631. vmovdqa xmm7, OWORD PTR [rax+96]
  9632. vaesenc xmm8, xmm8, xmm7
  9633. vaesenc xmm9, xmm9, xmm7
  9634. vaesenc xmm10, xmm10, xmm7
  9635. vaesenc xmm11, xmm11, xmm7
  9636. vaesenc xmm12, xmm12, xmm7
  9637. vaesenc xmm13, xmm13, xmm7
  9638. vaesenc xmm14, xmm14, xmm7
  9639. vaesenc xmm15, xmm15, xmm7
  9640. vmovdqa xmm7, OWORD PTR [rax+112]
  9641. vaesenc xmm8, xmm8, xmm7
  9642. vaesenc xmm9, xmm9, xmm7
  9643. vaesenc xmm10, xmm10, xmm7
  9644. vaesenc xmm11, xmm11, xmm7
  9645. vaesenc xmm12, xmm12, xmm7
  9646. vaesenc xmm13, xmm13, xmm7
  9647. vaesenc xmm14, xmm14, xmm7
  9648. vaesenc xmm15, xmm15, xmm7
  9649. vmovdqa xmm7, OWORD PTR [rax+128]
  9650. vaesenc xmm8, xmm8, xmm7
  9651. vaesenc xmm9, xmm9, xmm7
  9652. vaesenc xmm10, xmm10, xmm7
  9653. vaesenc xmm11, xmm11, xmm7
  9654. vaesenc xmm12, xmm12, xmm7
  9655. vaesenc xmm13, xmm13, xmm7
  9656. vaesenc xmm14, xmm14, xmm7
  9657. vaesenc xmm15, xmm15, xmm7
  9658. vmovdqa xmm7, OWORD PTR [rax+144]
  9659. vaesenc xmm8, xmm8, xmm7
  9660. vaesenc xmm9, xmm9, xmm7
  9661. vaesenc xmm10, xmm10, xmm7
  9662. vaesenc xmm11, xmm11, xmm7
  9663. vaesenc xmm12, xmm12, xmm7
  9664. vaesenc xmm13, xmm13, xmm7
  9665. vaesenc xmm14, xmm14, xmm7
  9666. vaesenc xmm15, xmm15, xmm7
  9667. cmp r8d, 11
  9668. vmovdqa xmm7, OWORD PTR [rax+160]
  9669. jl L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done
  9670. vaesenc xmm8, xmm8, xmm7
  9671. vaesenc xmm9, xmm9, xmm7
  9672. vaesenc xmm10, xmm10, xmm7
  9673. vaesenc xmm11, xmm11, xmm7
  9674. vaesenc xmm12, xmm12, xmm7
  9675. vaesenc xmm13, xmm13, xmm7
  9676. vaesenc xmm14, xmm14, xmm7
  9677. vaesenc xmm15, xmm15, xmm7
  9678. vmovdqa xmm7, OWORD PTR [rax+176]
  9679. vaesenc xmm8, xmm8, xmm7
  9680. vaesenc xmm9, xmm9, xmm7
  9681. vaesenc xmm10, xmm10, xmm7
  9682. vaesenc xmm11, xmm11, xmm7
  9683. vaesenc xmm12, xmm12, xmm7
  9684. vaesenc xmm13, xmm13, xmm7
  9685. vaesenc xmm14, xmm14, xmm7
  9686. vaesenc xmm15, xmm15, xmm7
  9687. cmp r8d, 13
  9688. vmovdqa xmm7, OWORD PTR [rax+192]
  9689. jl L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done
  9690. vaesenc xmm8, xmm8, xmm7
  9691. vaesenc xmm9, xmm9, xmm7
  9692. vaesenc xmm10, xmm10, xmm7
  9693. vaesenc xmm11, xmm11, xmm7
  9694. vaesenc xmm12, xmm12, xmm7
  9695. vaesenc xmm13, xmm13, xmm7
  9696. vaesenc xmm14, xmm14, xmm7
  9697. vaesenc xmm15, xmm15, xmm7
  9698. vmovdqa xmm7, OWORD PTR [rax+208]
  9699. vaesenc xmm8, xmm8, xmm7
  9700. vaesenc xmm9, xmm9, xmm7
  9701. vaesenc xmm10, xmm10, xmm7
  9702. vaesenc xmm11, xmm11, xmm7
  9703. vaesenc xmm12, xmm12, xmm7
  9704. vaesenc xmm13, xmm13, xmm7
  9705. vaesenc xmm14, xmm14, xmm7
  9706. vaesenc xmm15, xmm15, xmm7
  9707. vmovdqa xmm7, OWORD PTR [rax+224]
  9708. L_AES_GCM_encrypt_update_avx1_aesenc_128_enc_done:
  9709. vaesenclast xmm8, xmm8, xmm7
  9710. vaesenclast xmm9, xmm9, xmm7
  9711. vmovdqu xmm0, OWORD PTR [r11]
  9712. vmovdqu xmm1, OWORD PTR [r11+16]
  9713. vpxor xmm8, xmm8, xmm0
  9714. vpxor xmm9, xmm9, xmm1
  9715. vmovdqu OWORD PTR [r10], xmm8
  9716. vmovdqu OWORD PTR [r10+16], xmm9
  9717. vaesenclast xmm10, xmm10, xmm7
  9718. vaesenclast xmm11, xmm11, xmm7
  9719. vmovdqu xmm0, OWORD PTR [r11+32]
  9720. vmovdqu xmm1, OWORD PTR [r11+48]
  9721. vpxor xmm10, xmm10, xmm0
  9722. vpxor xmm11, xmm11, xmm1
  9723. vmovdqu OWORD PTR [r10+32], xmm10
  9724. vmovdqu OWORD PTR [r10+48], xmm11
  9725. vaesenclast xmm12, xmm12, xmm7
  9726. vaesenclast xmm13, xmm13, xmm7
  9727. vmovdqu xmm0, OWORD PTR [r11+64]
  9728. vmovdqu xmm1, OWORD PTR [r11+80]
  9729. vpxor xmm12, xmm12, xmm0
  9730. vpxor xmm13, xmm13, xmm1
  9731. vmovdqu OWORD PTR [r10+64], xmm12
  9732. vmovdqu OWORD PTR [r10+80], xmm13
  9733. vaesenclast xmm14, xmm14, xmm7
  9734. vaesenclast xmm15, xmm15, xmm7
  9735. vmovdqu xmm0, OWORD PTR [r11+96]
  9736. vmovdqu xmm1, OWORD PTR [r11+112]
  9737. vpxor xmm14, xmm14, xmm0
  9738. vpxor xmm15, xmm15, xmm1
  9739. vmovdqu OWORD PTR [r10+96], xmm14
  9740. vmovdqu OWORD PTR [r10+112], xmm15
  9741. cmp r13d, 128
  9742. mov edi, 128
  9743. jle L_AES_GCM_encrypt_update_avx1_end_128
  9744. ; More 128 bytes of input
  9745. L_AES_GCM_encrypt_update_avx1_ghash_128:
  9746. lea rcx, QWORD PTR [r11+rdi]
  9747. lea rdx, QWORD PTR [r10+rdi]
  9748. vmovdqu xmm0, OWORD PTR [r15]
  9749. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  9750. vpshufb xmm8, xmm0, xmm1
  9751. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  9752. vpshufb xmm9, xmm9, xmm1
  9753. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  9754. vpshufb xmm10, xmm10, xmm1
  9755. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  9756. vpshufb xmm11, xmm11, xmm1
  9757. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  9758. vpshufb xmm12, xmm12, xmm1
  9759. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  9760. vpshufb xmm13, xmm13, xmm1
  9761. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  9762. vpshufb xmm14, xmm14, xmm1
  9763. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  9764. vpshufb xmm15, xmm15, xmm1
  9765. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  9766. vmovdqa xmm7, OWORD PTR [rax]
  9767. vmovdqu OWORD PTR [r15], xmm0
  9768. vpxor xmm8, xmm8, xmm7
  9769. vpxor xmm9, xmm9, xmm7
  9770. vpxor xmm10, xmm10, xmm7
  9771. vpxor xmm11, xmm11, xmm7
  9772. vpxor xmm12, xmm12, xmm7
  9773. vpxor xmm13, xmm13, xmm7
  9774. vpxor xmm14, xmm14, xmm7
  9775. vpxor xmm15, xmm15, xmm7
  9776. vmovdqu xmm7, OWORD PTR [rsp+112]
  9777. vmovdqu xmm0, OWORD PTR [rdx+-128]
  9778. vaesenc xmm8, xmm8, [rax+16]
  9779. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9780. vpxor xmm0, xmm0, xmm2
  9781. vpshufd xmm1, xmm7, 78
  9782. vpshufd xmm5, xmm0, 78
  9783. vpxor xmm1, xmm1, xmm7
  9784. vpxor xmm5, xmm5, xmm0
  9785. vpclmulqdq xmm3, xmm0, xmm7, 17
  9786. vaesenc xmm9, xmm9, [rax+16]
  9787. vaesenc xmm10, xmm10, [rax+16]
  9788. vpclmulqdq xmm2, xmm0, xmm7, 0
  9789. vaesenc xmm11, xmm11, [rax+16]
  9790. vaesenc xmm12, xmm12, [rax+16]
  9791. vpclmulqdq xmm1, xmm1, xmm5, 0
  9792. vaesenc xmm13, xmm13, [rax+16]
  9793. vaesenc xmm14, xmm14, [rax+16]
  9794. vaesenc xmm15, xmm15, [rax+16]
  9795. vpxor xmm1, xmm1, xmm2
  9796. vpxor xmm1, xmm1, xmm3
  9797. vmovdqu xmm7, OWORD PTR [rsp+96]
  9798. vmovdqu xmm0, OWORD PTR [rdx+-112]
  9799. vpshufd xmm4, xmm7, 78
  9800. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9801. vaesenc xmm8, xmm8, [rax+32]
  9802. vpxor xmm4, xmm4, xmm7
  9803. vpshufd xmm5, xmm0, 78
  9804. vpxor xmm5, xmm5, xmm0
  9805. vpclmulqdq xmm6, xmm0, xmm7, 17
  9806. vaesenc xmm9, xmm9, [rax+32]
  9807. vaesenc xmm10, xmm10, [rax+32]
  9808. vpclmulqdq xmm7, xmm0, xmm7, 0
  9809. vaesenc xmm11, xmm11, [rax+32]
  9810. vaesenc xmm12, xmm12, [rax+32]
  9811. vpclmulqdq xmm4, xmm4, xmm5, 0
  9812. vaesenc xmm13, xmm13, [rax+32]
  9813. vaesenc xmm14, xmm14, [rax+32]
  9814. vaesenc xmm15, xmm15, [rax+32]
  9815. vpxor xmm1, xmm1, xmm7
  9816. vpxor xmm2, xmm2, xmm7
  9817. vpxor xmm1, xmm1, xmm6
  9818. vpxor xmm3, xmm3, xmm6
  9819. vpxor xmm1, xmm1, xmm4
  9820. vmovdqu xmm7, OWORD PTR [rsp+80]
  9821. vmovdqu xmm0, OWORD PTR [rdx+-96]
  9822. vpshufd xmm4, xmm7, 78
  9823. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9824. vaesenc xmm8, xmm8, [rax+48]
  9825. vpxor xmm4, xmm4, xmm7
  9826. vpshufd xmm5, xmm0, 78
  9827. vpxor xmm5, xmm5, xmm0
  9828. vpclmulqdq xmm6, xmm0, xmm7, 17
  9829. vaesenc xmm9, xmm9, [rax+48]
  9830. vaesenc xmm10, xmm10, [rax+48]
  9831. vpclmulqdq xmm7, xmm0, xmm7, 0
  9832. vaesenc xmm11, xmm11, [rax+48]
  9833. vaesenc xmm12, xmm12, [rax+48]
  9834. vpclmulqdq xmm4, xmm4, xmm5, 0
  9835. vaesenc xmm13, xmm13, [rax+48]
  9836. vaesenc xmm14, xmm14, [rax+48]
  9837. vaesenc xmm15, xmm15, [rax+48]
  9838. vpxor xmm1, xmm1, xmm7
  9839. vpxor xmm2, xmm2, xmm7
  9840. vpxor xmm1, xmm1, xmm6
  9841. vpxor xmm3, xmm3, xmm6
  9842. vpxor xmm1, xmm1, xmm4
  9843. vmovdqu xmm7, OWORD PTR [rsp+64]
  9844. vmovdqu xmm0, OWORD PTR [rdx+-80]
  9845. vpshufd xmm4, xmm7, 78
  9846. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9847. vaesenc xmm8, xmm8, [rax+64]
  9848. vpxor xmm4, xmm4, xmm7
  9849. vpshufd xmm5, xmm0, 78
  9850. vpxor xmm5, xmm5, xmm0
  9851. vpclmulqdq xmm6, xmm0, xmm7, 17
  9852. vaesenc xmm9, xmm9, [rax+64]
  9853. vaesenc xmm10, xmm10, [rax+64]
  9854. vpclmulqdq xmm7, xmm0, xmm7, 0
  9855. vaesenc xmm11, xmm11, [rax+64]
  9856. vaesenc xmm12, xmm12, [rax+64]
  9857. vpclmulqdq xmm4, xmm4, xmm5, 0
  9858. vaesenc xmm13, xmm13, [rax+64]
  9859. vaesenc xmm14, xmm14, [rax+64]
  9860. vaesenc xmm15, xmm15, [rax+64]
  9861. vpxor xmm1, xmm1, xmm7
  9862. vpxor xmm2, xmm2, xmm7
  9863. vpxor xmm1, xmm1, xmm6
  9864. vpxor xmm3, xmm3, xmm6
  9865. vpxor xmm1, xmm1, xmm4
  9866. vmovdqu xmm7, OWORD PTR [rsp+48]
  9867. vmovdqu xmm0, OWORD PTR [rdx+-64]
  9868. vpshufd xmm4, xmm7, 78
  9869. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9870. vaesenc xmm8, xmm8, [rax+80]
  9871. vpxor xmm4, xmm4, xmm7
  9872. vpshufd xmm5, xmm0, 78
  9873. vpxor xmm5, xmm5, xmm0
  9874. vpclmulqdq xmm6, xmm0, xmm7, 17
  9875. vaesenc xmm9, xmm9, [rax+80]
  9876. vaesenc xmm10, xmm10, [rax+80]
  9877. vpclmulqdq xmm7, xmm0, xmm7, 0
  9878. vaesenc xmm11, xmm11, [rax+80]
  9879. vaesenc xmm12, xmm12, [rax+80]
  9880. vpclmulqdq xmm4, xmm4, xmm5, 0
  9881. vaesenc xmm13, xmm13, [rax+80]
  9882. vaesenc xmm14, xmm14, [rax+80]
  9883. vaesenc xmm15, xmm15, [rax+80]
  9884. vpxor xmm1, xmm1, xmm7
  9885. vpxor xmm2, xmm2, xmm7
  9886. vpxor xmm1, xmm1, xmm6
  9887. vpxor xmm3, xmm3, xmm6
  9888. vpxor xmm1, xmm1, xmm4
  9889. vmovdqu xmm7, OWORD PTR [rsp+32]
  9890. vmovdqu xmm0, OWORD PTR [rdx+-48]
  9891. vpshufd xmm4, xmm7, 78
  9892. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9893. vaesenc xmm8, xmm8, [rax+96]
  9894. vpxor xmm4, xmm4, xmm7
  9895. vpshufd xmm5, xmm0, 78
  9896. vpxor xmm5, xmm5, xmm0
  9897. vpclmulqdq xmm6, xmm0, xmm7, 17
  9898. vaesenc xmm9, xmm9, [rax+96]
  9899. vaesenc xmm10, xmm10, [rax+96]
  9900. vpclmulqdq xmm7, xmm0, xmm7, 0
  9901. vaesenc xmm11, xmm11, [rax+96]
  9902. vaesenc xmm12, xmm12, [rax+96]
  9903. vpclmulqdq xmm4, xmm4, xmm5, 0
  9904. vaesenc xmm13, xmm13, [rax+96]
  9905. vaesenc xmm14, xmm14, [rax+96]
  9906. vaesenc xmm15, xmm15, [rax+96]
  9907. vpxor xmm1, xmm1, xmm7
  9908. vpxor xmm2, xmm2, xmm7
  9909. vpxor xmm1, xmm1, xmm6
  9910. vpxor xmm3, xmm3, xmm6
  9911. vpxor xmm1, xmm1, xmm4
  9912. vmovdqu xmm7, OWORD PTR [rsp+16]
  9913. vmovdqu xmm0, OWORD PTR [rdx+-32]
  9914. vpshufd xmm4, xmm7, 78
  9915. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9916. vaesenc xmm8, xmm8, [rax+112]
  9917. vpxor xmm4, xmm4, xmm7
  9918. vpshufd xmm5, xmm0, 78
  9919. vpxor xmm5, xmm5, xmm0
  9920. vpclmulqdq xmm6, xmm0, xmm7, 17
  9921. vaesenc xmm9, xmm9, [rax+112]
  9922. vaesenc xmm10, xmm10, [rax+112]
  9923. vpclmulqdq xmm7, xmm0, xmm7, 0
  9924. vaesenc xmm11, xmm11, [rax+112]
  9925. vaesenc xmm12, xmm12, [rax+112]
  9926. vpclmulqdq xmm4, xmm4, xmm5, 0
  9927. vaesenc xmm13, xmm13, [rax+112]
  9928. vaesenc xmm14, xmm14, [rax+112]
  9929. vaesenc xmm15, xmm15, [rax+112]
  9930. vpxor xmm1, xmm1, xmm7
  9931. vpxor xmm2, xmm2, xmm7
  9932. vpxor xmm1, xmm1, xmm6
  9933. vpxor xmm3, xmm3, xmm6
  9934. vpxor xmm1, xmm1, xmm4
  9935. vmovdqu xmm7, OWORD PTR [rsp]
  9936. vmovdqu xmm0, OWORD PTR [rdx+-16]
  9937. vpshufd xmm4, xmm7, 78
  9938. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  9939. vaesenc xmm8, xmm8, [rax+128]
  9940. vpxor xmm4, xmm4, xmm7
  9941. vpshufd xmm5, xmm0, 78
  9942. vpxor xmm5, xmm5, xmm0
  9943. vpclmulqdq xmm6, xmm0, xmm7, 17
  9944. vaesenc xmm9, xmm9, [rax+128]
  9945. vaesenc xmm10, xmm10, [rax+128]
  9946. vpclmulqdq xmm7, xmm0, xmm7, 0
  9947. vaesenc xmm11, xmm11, [rax+128]
  9948. vaesenc xmm12, xmm12, [rax+128]
  9949. vpclmulqdq xmm4, xmm4, xmm5, 0
  9950. vaesenc xmm13, xmm13, [rax+128]
  9951. vaesenc xmm14, xmm14, [rax+128]
  9952. vaesenc xmm15, xmm15, [rax+128]
  9953. vpxor xmm1, xmm1, xmm7
  9954. vpxor xmm2, xmm2, xmm7
  9955. vpxor xmm1, xmm1, xmm6
  9956. vpxor xmm3, xmm3, xmm6
  9957. vpxor xmm1, xmm1, xmm4
  9958. vpslldq xmm5, xmm1, 8
  9959. vpsrldq xmm1, xmm1, 8
  9960. vaesenc xmm8, xmm8, [rax+144]
  9961. vpxor xmm2, xmm2, xmm5
  9962. vpxor xmm3, xmm3, xmm1
  9963. vaesenc xmm9, xmm9, [rax+144]
  9964. vpslld xmm7, xmm2, 31
  9965. vpslld xmm4, xmm2, 30
  9966. vpslld xmm5, xmm2, 25
  9967. vaesenc xmm10, xmm10, [rax+144]
  9968. vpxor xmm7, xmm7, xmm4
  9969. vpxor xmm7, xmm7, xmm5
  9970. vaesenc xmm11, xmm11, [rax+144]
  9971. vpsrldq xmm4, xmm7, 4
  9972. vpslldq xmm7, xmm7, 12
  9973. vaesenc xmm12, xmm12, [rax+144]
  9974. vpxor xmm2, xmm2, xmm7
  9975. vpsrld xmm5, xmm2, 1
  9976. vaesenc xmm13, xmm13, [rax+144]
  9977. vpsrld xmm1, xmm2, 2
  9978. vpsrld xmm0, xmm2, 7
  9979. vaesenc xmm14, xmm14, [rax+144]
  9980. vpxor xmm5, xmm5, xmm1
  9981. vpxor xmm5, xmm5, xmm0
  9982. vaesenc xmm15, xmm15, [rax+144]
  9983. vpxor xmm5, xmm5, xmm4
  9984. vpxor xmm2, xmm2, xmm5
  9985. vpxor xmm2, xmm2, xmm3
  9986. cmp r8d, 11
  9987. vmovdqa xmm7, OWORD PTR [rax+160]
  9988. jl L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done
  9989. vaesenc xmm8, xmm8, xmm7
  9990. vaesenc xmm9, xmm9, xmm7
  9991. vaesenc xmm10, xmm10, xmm7
  9992. vaesenc xmm11, xmm11, xmm7
  9993. vaesenc xmm12, xmm12, xmm7
  9994. vaesenc xmm13, xmm13, xmm7
  9995. vaesenc xmm14, xmm14, xmm7
  9996. vaesenc xmm15, xmm15, xmm7
  9997. vmovdqa xmm7, OWORD PTR [rax+176]
  9998. vaesenc xmm8, xmm8, xmm7
  9999. vaesenc xmm9, xmm9, xmm7
  10000. vaesenc xmm10, xmm10, xmm7
  10001. vaesenc xmm11, xmm11, xmm7
  10002. vaesenc xmm12, xmm12, xmm7
  10003. vaesenc xmm13, xmm13, xmm7
  10004. vaesenc xmm14, xmm14, xmm7
  10005. vaesenc xmm15, xmm15, xmm7
  10006. cmp r8d, 13
  10007. vmovdqa xmm7, OWORD PTR [rax+192]
  10008. jl L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done
  10009. vaesenc xmm8, xmm8, xmm7
  10010. vaesenc xmm9, xmm9, xmm7
  10011. vaesenc xmm10, xmm10, xmm7
  10012. vaesenc xmm11, xmm11, xmm7
  10013. vaesenc xmm12, xmm12, xmm7
  10014. vaesenc xmm13, xmm13, xmm7
  10015. vaesenc xmm14, xmm14, xmm7
  10016. vaesenc xmm15, xmm15, xmm7
  10017. vmovdqa xmm7, OWORD PTR [rax+208]
  10018. vaesenc xmm8, xmm8, xmm7
  10019. vaesenc xmm9, xmm9, xmm7
  10020. vaesenc xmm10, xmm10, xmm7
  10021. vaesenc xmm11, xmm11, xmm7
  10022. vaesenc xmm12, xmm12, xmm7
  10023. vaesenc xmm13, xmm13, xmm7
  10024. vaesenc xmm14, xmm14, xmm7
  10025. vaesenc xmm15, xmm15, xmm7
  10026. vmovdqa xmm7, OWORD PTR [rax+224]
  10027. L_AES_GCM_encrypt_update_avx1_aesenc_128_ghash_avx_done:
  10028. vaesenclast xmm8, xmm8, xmm7
  10029. vaesenclast xmm9, xmm9, xmm7
  10030. vmovdqu xmm0, OWORD PTR [rcx]
  10031. vmovdqu xmm1, OWORD PTR [rcx+16]
  10032. vpxor xmm8, xmm8, xmm0
  10033. vpxor xmm9, xmm9, xmm1
  10034. vmovdqu OWORD PTR [rdx], xmm8
  10035. vmovdqu OWORD PTR [rdx+16], xmm9
  10036. vaesenclast xmm10, xmm10, xmm7
  10037. vaesenclast xmm11, xmm11, xmm7
  10038. vmovdqu xmm0, OWORD PTR [rcx+32]
  10039. vmovdqu xmm1, OWORD PTR [rcx+48]
  10040. vpxor xmm10, xmm10, xmm0
  10041. vpxor xmm11, xmm11, xmm1
  10042. vmovdqu OWORD PTR [rdx+32], xmm10
  10043. vmovdqu OWORD PTR [rdx+48], xmm11
  10044. vaesenclast xmm12, xmm12, xmm7
  10045. vaesenclast xmm13, xmm13, xmm7
  10046. vmovdqu xmm0, OWORD PTR [rcx+64]
  10047. vmovdqu xmm1, OWORD PTR [rcx+80]
  10048. vpxor xmm12, xmm12, xmm0
  10049. vpxor xmm13, xmm13, xmm1
  10050. vmovdqu OWORD PTR [rdx+64], xmm12
  10051. vmovdqu OWORD PTR [rdx+80], xmm13
  10052. vaesenclast xmm14, xmm14, xmm7
  10053. vaesenclast xmm15, xmm15, xmm7
  10054. vmovdqu xmm0, OWORD PTR [rcx+96]
  10055. vmovdqu xmm1, OWORD PTR [rcx+112]
  10056. vpxor xmm14, xmm14, xmm0
  10057. vpxor xmm15, xmm15, xmm1
  10058. vmovdqu OWORD PTR [rdx+96], xmm14
  10059. vmovdqu OWORD PTR [rdx+112], xmm15
  10060. add edi, 128
  10061. cmp edi, r13d
  10062. jl L_AES_GCM_encrypt_update_avx1_ghash_128
  10063. L_AES_GCM_encrypt_update_avx1_end_128:
  10064. vmovdqa xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10065. vpshufb xmm8, xmm8, xmm4
  10066. vpshufb xmm9, xmm9, xmm4
  10067. vpshufb xmm10, xmm10, xmm4
  10068. vpshufb xmm11, xmm11, xmm4
  10069. vpxor xmm8, xmm8, xmm2
  10070. vpshufb xmm12, xmm12, xmm4
  10071. vpshufb xmm13, xmm13, xmm4
  10072. vpshufb xmm14, xmm14, xmm4
  10073. vpshufb xmm15, xmm15, xmm4
  10074. vmovdqu xmm7, OWORD PTR [rsp]
  10075. vmovdqu xmm5, OWORD PTR [rsp+16]
  10076. ; ghash_gfmul_avx
  10077. vpshufd xmm1, xmm15, 78
  10078. vpshufd xmm2, xmm7, 78
  10079. vpclmulqdq xmm3, xmm7, xmm15, 17
  10080. vpclmulqdq xmm0, xmm7, xmm15, 0
  10081. vpxor xmm1, xmm1, xmm15
  10082. vpxor xmm2, xmm2, xmm7
  10083. vpclmulqdq xmm1, xmm1, xmm2, 0
  10084. vpxor xmm1, xmm1, xmm0
  10085. vpxor xmm1, xmm1, xmm3
  10086. vmovdqa xmm4, xmm0
  10087. vmovdqa xmm6, xmm3
  10088. vpslldq xmm2, xmm1, 8
  10089. vpsrldq xmm1, xmm1, 8
  10090. vpxor xmm4, xmm4, xmm2
  10091. vpxor xmm6, xmm6, xmm1
  10092. ; ghash_gfmul_xor_avx
  10093. vpshufd xmm1, xmm14, 78
  10094. vpshufd xmm2, xmm5, 78
  10095. vpclmulqdq xmm3, xmm5, xmm14, 17
  10096. vpclmulqdq xmm0, xmm5, xmm14, 0
  10097. vpxor xmm1, xmm1, xmm14
  10098. vpxor xmm2, xmm2, xmm5
  10099. vpclmulqdq xmm1, xmm1, xmm2, 0
  10100. vpxor xmm1, xmm1, xmm0
  10101. vpxor xmm1, xmm1, xmm3
  10102. vpxor xmm4, xmm4, xmm0
  10103. vpxor xmm6, xmm6, xmm3
  10104. vpslldq xmm2, xmm1, 8
  10105. vpsrldq xmm1, xmm1, 8
  10106. vpxor xmm4, xmm4, xmm2
  10107. vpxor xmm6, xmm6, xmm1
  10108. vmovdqu xmm7, OWORD PTR [rsp+32]
  10109. vmovdqu xmm5, OWORD PTR [rsp+48]
  10110. ; ghash_gfmul_xor_avx
  10111. vpshufd xmm1, xmm13, 78
  10112. vpshufd xmm2, xmm7, 78
  10113. vpclmulqdq xmm3, xmm7, xmm13, 17
  10114. vpclmulqdq xmm0, xmm7, xmm13, 0
  10115. vpxor xmm1, xmm1, xmm13
  10116. vpxor xmm2, xmm2, xmm7
  10117. vpclmulqdq xmm1, xmm1, xmm2, 0
  10118. vpxor xmm1, xmm1, xmm0
  10119. vpxor xmm1, xmm1, xmm3
  10120. vpxor xmm4, xmm4, xmm0
  10121. vpxor xmm6, xmm6, xmm3
  10122. vpslldq xmm2, xmm1, 8
  10123. vpsrldq xmm1, xmm1, 8
  10124. vpxor xmm4, xmm4, xmm2
  10125. vpxor xmm6, xmm6, xmm1
  10126. ; ghash_gfmul_xor_avx
  10127. vpshufd xmm1, xmm12, 78
  10128. vpshufd xmm2, xmm5, 78
  10129. vpclmulqdq xmm3, xmm5, xmm12, 17
  10130. vpclmulqdq xmm0, xmm5, xmm12, 0
  10131. vpxor xmm1, xmm1, xmm12
  10132. vpxor xmm2, xmm2, xmm5
  10133. vpclmulqdq xmm1, xmm1, xmm2, 0
  10134. vpxor xmm1, xmm1, xmm0
  10135. vpxor xmm1, xmm1, xmm3
  10136. vpxor xmm4, xmm4, xmm0
  10137. vpxor xmm6, xmm6, xmm3
  10138. vpslldq xmm2, xmm1, 8
  10139. vpsrldq xmm1, xmm1, 8
  10140. vpxor xmm4, xmm4, xmm2
  10141. vpxor xmm6, xmm6, xmm1
  10142. vmovdqu xmm7, OWORD PTR [rsp+64]
  10143. vmovdqu xmm5, OWORD PTR [rsp+80]
  10144. ; ghash_gfmul_xor_avx
  10145. vpshufd xmm1, xmm11, 78
  10146. vpshufd xmm2, xmm7, 78
  10147. vpclmulqdq xmm3, xmm7, xmm11, 17
  10148. vpclmulqdq xmm0, xmm7, xmm11, 0
  10149. vpxor xmm1, xmm1, xmm11
  10150. vpxor xmm2, xmm2, xmm7
  10151. vpclmulqdq xmm1, xmm1, xmm2, 0
  10152. vpxor xmm1, xmm1, xmm0
  10153. vpxor xmm1, xmm1, xmm3
  10154. vpxor xmm4, xmm4, xmm0
  10155. vpxor xmm6, xmm6, xmm3
  10156. vpslldq xmm2, xmm1, 8
  10157. vpsrldq xmm1, xmm1, 8
  10158. vpxor xmm4, xmm4, xmm2
  10159. vpxor xmm6, xmm6, xmm1
  10160. ; ghash_gfmul_xor_avx
  10161. vpshufd xmm1, xmm10, 78
  10162. vpshufd xmm2, xmm5, 78
  10163. vpclmulqdq xmm3, xmm5, xmm10, 17
  10164. vpclmulqdq xmm0, xmm5, xmm10, 0
  10165. vpxor xmm1, xmm1, xmm10
  10166. vpxor xmm2, xmm2, xmm5
  10167. vpclmulqdq xmm1, xmm1, xmm2, 0
  10168. vpxor xmm1, xmm1, xmm0
  10169. vpxor xmm1, xmm1, xmm3
  10170. vpxor xmm4, xmm4, xmm0
  10171. vpxor xmm6, xmm6, xmm3
  10172. vpslldq xmm2, xmm1, 8
  10173. vpsrldq xmm1, xmm1, 8
  10174. vpxor xmm4, xmm4, xmm2
  10175. vpxor xmm6, xmm6, xmm1
  10176. vmovdqu xmm7, OWORD PTR [rsp+96]
  10177. vmovdqu xmm5, OWORD PTR [rsp+112]
  10178. ; ghash_gfmul_xor_avx
  10179. vpshufd xmm1, xmm9, 78
  10180. vpshufd xmm2, xmm7, 78
  10181. vpclmulqdq xmm3, xmm7, xmm9, 17
  10182. vpclmulqdq xmm0, xmm7, xmm9, 0
  10183. vpxor xmm1, xmm1, xmm9
  10184. vpxor xmm2, xmm2, xmm7
  10185. vpclmulqdq xmm1, xmm1, xmm2, 0
  10186. vpxor xmm1, xmm1, xmm0
  10187. vpxor xmm1, xmm1, xmm3
  10188. vpxor xmm4, xmm4, xmm0
  10189. vpxor xmm6, xmm6, xmm3
  10190. vpslldq xmm2, xmm1, 8
  10191. vpsrldq xmm1, xmm1, 8
  10192. vpxor xmm4, xmm4, xmm2
  10193. vpxor xmm6, xmm6, xmm1
  10194. ; ghash_gfmul_xor_avx
  10195. vpshufd xmm1, xmm8, 78
  10196. vpshufd xmm2, xmm5, 78
  10197. vpclmulqdq xmm3, xmm5, xmm8, 17
  10198. vpclmulqdq xmm0, xmm5, xmm8, 0
  10199. vpxor xmm1, xmm1, xmm8
  10200. vpxor xmm2, xmm2, xmm5
  10201. vpclmulqdq xmm1, xmm1, xmm2, 0
  10202. vpxor xmm1, xmm1, xmm0
  10203. vpxor xmm1, xmm1, xmm3
  10204. vpxor xmm4, xmm4, xmm0
  10205. vpxor xmm6, xmm6, xmm3
  10206. vpslldq xmm2, xmm1, 8
  10207. vpsrldq xmm1, xmm1, 8
  10208. vpxor xmm4, xmm4, xmm2
  10209. vpxor xmm6, xmm6, xmm1
  10210. vpslld xmm0, xmm4, 31
  10211. vpslld xmm1, xmm4, 30
  10212. vpslld xmm2, xmm4, 25
  10213. vpxor xmm0, xmm0, xmm1
  10214. vpxor xmm0, xmm0, xmm2
  10215. vmovdqa xmm1, xmm0
  10216. vpsrldq xmm1, xmm1, 4
  10217. vpslldq xmm0, xmm0, 12
  10218. vpxor xmm4, xmm4, xmm0
  10219. vpsrld xmm2, xmm4, 1
  10220. vpsrld xmm3, xmm4, 2
  10221. vpsrld xmm0, xmm4, 7
  10222. vpxor xmm2, xmm2, xmm3
  10223. vpxor xmm2, xmm2, xmm0
  10224. vpxor xmm2, xmm2, xmm1
  10225. vpxor xmm2, xmm2, xmm4
  10226. vpxor xmm6, xmm6, xmm2
  10227. vmovdqu xmm5, OWORD PTR [rsp]
  10228. L_AES_GCM_encrypt_update_avx1_done_128:
  10229. mov edx, r9d
  10230. cmp edi, edx
  10231. jge L_AES_GCM_encrypt_update_avx1_done_enc
  10232. mov r13d, r9d
  10233. and r13d, 4294967280
  10234. cmp edi, r13d
  10235. jge L_AES_GCM_encrypt_update_avx1_last_block_done
  10236. vmovdqu xmm9, OWORD PTR [r15]
  10237. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  10238. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  10239. vmovdqu OWORD PTR [r15], xmm9
  10240. vpxor xmm8, xmm8, [rax]
  10241. vaesenc xmm8, xmm8, [rax+16]
  10242. vaesenc xmm8, xmm8, [rax+32]
  10243. vaesenc xmm8, xmm8, [rax+48]
  10244. vaesenc xmm8, xmm8, [rax+64]
  10245. vaesenc xmm8, xmm8, [rax+80]
  10246. vaesenc xmm8, xmm8, [rax+96]
  10247. vaesenc xmm8, xmm8, [rax+112]
  10248. vaesenc xmm8, xmm8, [rax+128]
  10249. vaesenc xmm8, xmm8, [rax+144]
  10250. cmp r8d, 11
  10251. vmovdqa xmm9, OWORD PTR [rax+160]
  10252. jl L_AES_GCM_encrypt_update_avx1_aesenc_block_last
  10253. vaesenc xmm8, xmm8, xmm9
  10254. vaesenc xmm8, xmm8, [rax+176]
  10255. cmp r8d, 13
  10256. vmovdqa xmm9, OWORD PTR [rax+192]
  10257. jl L_AES_GCM_encrypt_update_avx1_aesenc_block_last
  10258. vaesenc xmm8, xmm8, xmm9
  10259. vaesenc xmm8, xmm8, [rax+208]
  10260. vmovdqa xmm9, OWORD PTR [rax+224]
  10261. L_AES_GCM_encrypt_update_avx1_aesenc_block_last:
  10262. vaesenclast xmm8, xmm8, xmm9
  10263. vmovdqu xmm9, OWORD PTR [r11+rdi]
  10264. vpxor xmm8, xmm8, xmm9
  10265. vmovdqu OWORD PTR [r10+rdi], xmm8
  10266. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10267. vpxor xmm6, xmm6, xmm8
  10268. add edi, 16
  10269. cmp edi, r13d
  10270. jge L_AES_GCM_encrypt_update_avx1_last_block_ghash
  10271. L_AES_GCM_encrypt_update_avx1_last_block_start:
  10272. vmovdqu xmm13, OWORD PTR [r11+rdi]
  10273. vmovdqu xmm9, OWORD PTR [r15]
  10274. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  10275. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  10276. vmovdqu OWORD PTR [r15], xmm9
  10277. vpxor xmm8, xmm8, [rax]
  10278. vpclmulqdq xmm10, xmm6, xmm5, 16
  10279. vaesenc xmm8, xmm8, [rax+16]
  10280. vaesenc xmm8, xmm8, [rax+32]
  10281. vpclmulqdq xmm11, xmm6, xmm5, 1
  10282. vaesenc xmm8, xmm8, [rax+48]
  10283. vaesenc xmm8, xmm8, [rax+64]
  10284. vpclmulqdq xmm12, xmm6, xmm5, 0
  10285. vaesenc xmm8, xmm8, [rax+80]
  10286. vpclmulqdq xmm1, xmm6, xmm5, 17
  10287. vaesenc xmm8, xmm8, [rax+96]
  10288. vpxor xmm10, xmm10, xmm11
  10289. vpslldq xmm2, xmm10, 8
  10290. vpsrldq xmm10, xmm10, 8
  10291. vaesenc xmm8, xmm8, [rax+112]
  10292. vpxor xmm2, xmm2, xmm12
  10293. vpxor xmm3, xmm1, xmm10
  10294. vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
  10295. vpclmulqdq xmm11, xmm2, xmm0, 16
  10296. vaesenc xmm8, xmm8, [rax+128]
  10297. vpshufd xmm10, xmm2, 78
  10298. vpxor xmm10, xmm10, xmm11
  10299. vpclmulqdq xmm11, xmm10, xmm0, 16
  10300. vaesenc xmm8, xmm8, [rax+144]
  10301. vpshufd xmm10, xmm10, 78
  10302. vpxor xmm10, xmm10, xmm11
  10303. vpxor xmm6, xmm10, xmm3
  10304. cmp r8d, 11
  10305. vmovdqa xmm9, OWORD PTR [rax+160]
  10306. jl L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last
  10307. vaesenc xmm8, xmm8, xmm9
  10308. vaesenc xmm8, xmm8, [rax+176]
  10309. cmp r8d, 13
  10310. vmovdqa xmm9, OWORD PTR [rax+192]
  10311. jl L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last
  10312. vaesenc xmm8, xmm8, xmm9
  10313. vaesenc xmm8, xmm8, [rax+208]
  10314. vmovdqa xmm9, OWORD PTR [rax+224]
  10315. L_AES_GCM_encrypt_update_avx1_aesenc_gfmul_last:
  10316. vaesenclast xmm8, xmm8, xmm9
  10317. vmovdqa xmm0, xmm13
  10318. vpxor xmm8, xmm8, xmm0
  10319. vmovdqu OWORD PTR [r10+rdi], xmm8
  10320. vpshufb xmm8, xmm8, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10321. add edi, 16
  10322. vpxor xmm6, xmm6, xmm8
  10323. cmp edi, r13d
  10324. jl L_AES_GCM_encrypt_update_avx1_last_block_start
  10325. L_AES_GCM_encrypt_update_avx1_last_block_ghash:
  10326. ; ghash_gfmul_red_avx
  10327. vpshufd xmm9, xmm5, 78
  10328. vpshufd xmm10, xmm6, 78
  10329. vpclmulqdq xmm11, xmm6, xmm5, 17
  10330. vpclmulqdq xmm8, xmm6, xmm5, 0
  10331. vpxor xmm9, xmm9, xmm5
  10332. vpxor xmm10, xmm10, xmm6
  10333. vpclmulqdq xmm9, xmm9, xmm10, 0
  10334. vpxor xmm9, xmm9, xmm8
  10335. vpxor xmm9, xmm9, xmm11
  10336. vpslldq xmm10, xmm9, 8
  10337. vpsrldq xmm9, xmm9, 8
  10338. vpxor xmm8, xmm8, xmm10
  10339. vpxor xmm6, xmm11, xmm9
  10340. vpslld xmm12, xmm8, 31
  10341. vpslld xmm13, xmm8, 30
  10342. vpslld xmm14, xmm8, 25
  10343. vpxor xmm12, xmm12, xmm13
  10344. vpxor xmm12, xmm12, xmm14
  10345. vpsrldq xmm13, xmm12, 4
  10346. vpslldq xmm12, xmm12, 12
  10347. vpxor xmm8, xmm8, xmm12
  10348. vpsrld xmm14, xmm8, 1
  10349. vpsrld xmm10, xmm8, 2
  10350. vpsrld xmm9, xmm8, 7
  10351. vpxor xmm14, xmm14, xmm10
  10352. vpxor xmm14, xmm14, xmm9
  10353. vpxor xmm14, xmm14, xmm13
  10354. vpxor xmm14, xmm14, xmm8
  10355. vpxor xmm6, xmm6, xmm14
  10356. L_AES_GCM_encrypt_update_avx1_last_block_done:
  10357. L_AES_GCM_encrypt_update_avx1_done_enc:
  10358. vmovdqa OWORD PTR [r12], xmm6
  10359. vzeroupper
  10360. add rsp, 160
  10361. pop rdi
  10362. pop r15
  10363. pop r14
  10364. pop r12
  10365. pop r13
  10366. ret
  10367. AES_GCM_encrypt_update_avx1 ENDP
  10368. _text ENDS
  10369. _text SEGMENT READONLY PARA
  10370. AES_GCM_encrypt_final_avx1 PROC
  10371. push r13
  10372. push r12
  10373. push r14
  10374. mov rax, rcx
  10375. mov r10d, r9d
  10376. mov r9, rdx
  10377. mov r11d, DWORD PTR [rsp+64]
  10378. mov r12, QWORD PTR [rsp+72]
  10379. mov r14, QWORD PTR [rsp+80]
  10380. sub rsp, 16
  10381. vmovdqa xmm4, OWORD PTR [rax]
  10382. vmovdqa xmm5, OWORD PTR [r12]
  10383. vmovdqa xmm6, OWORD PTR [r14]
  10384. vpsrlq xmm9, xmm5, 63
  10385. vpsllq xmm8, xmm5, 1
  10386. vpslldq xmm9, xmm9, 8
  10387. vpor xmm8, xmm8, xmm9
  10388. vpshufd xmm5, xmm5, 255
  10389. vpsrad xmm5, xmm5, 31
  10390. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  10391. vpxor xmm5, xmm5, xmm8
  10392. mov edx, r10d
  10393. mov ecx, r11d
  10394. shl rdx, 3
  10395. shl rcx, 3
  10396. vmovq xmm0, rdx
  10397. vmovq xmm1, rcx
  10398. vpunpcklqdq xmm0, xmm0, xmm1
  10399. vpxor xmm4, xmm4, xmm0
  10400. ; ghash_gfmul_red_avx
  10401. vpshufd xmm9, xmm5, 78
  10402. vpshufd xmm10, xmm4, 78
  10403. vpclmulqdq xmm11, xmm4, xmm5, 17
  10404. vpclmulqdq xmm8, xmm4, xmm5, 0
  10405. vpxor xmm9, xmm9, xmm5
  10406. vpxor xmm10, xmm10, xmm4
  10407. vpclmulqdq xmm9, xmm9, xmm10, 0
  10408. vpxor xmm9, xmm9, xmm8
  10409. vpxor xmm9, xmm9, xmm11
  10410. vpslldq xmm10, xmm9, 8
  10411. vpsrldq xmm9, xmm9, 8
  10412. vpxor xmm8, xmm8, xmm10
  10413. vpxor xmm4, xmm11, xmm9
  10414. vpslld xmm12, xmm8, 31
  10415. vpslld xmm13, xmm8, 30
  10416. vpslld xmm14, xmm8, 25
  10417. vpxor xmm12, xmm12, xmm13
  10418. vpxor xmm12, xmm12, xmm14
  10419. vpsrldq xmm13, xmm12, 4
  10420. vpslldq xmm12, xmm12, 12
  10421. vpxor xmm8, xmm8, xmm12
  10422. vpsrld xmm14, xmm8, 1
  10423. vpsrld xmm10, xmm8, 2
  10424. vpsrld xmm9, xmm8, 7
  10425. vpxor xmm14, xmm14, xmm10
  10426. vpxor xmm14, xmm14, xmm9
  10427. vpxor xmm14, xmm14, xmm13
  10428. vpxor xmm14, xmm14, xmm8
  10429. vpxor xmm4, xmm4, xmm14
  10430. vpshufb xmm4, xmm4, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10431. vpxor xmm0, xmm4, xmm6
  10432. cmp r8d, 16
  10433. je L_AES_GCM_encrypt_final_avx1_store_tag_16
  10434. xor rcx, rcx
  10435. vmovdqu OWORD PTR [rsp], xmm0
  10436. L_AES_GCM_encrypt_final_avx1_store_tag_loop:
  10437. movzx r13d, BYTE PTR [rsp+rcx]
  10438. mov BYTE PTR [r9+rcx], r13b
  10439. inc ecx
  10440. cmp ecx, r8d
  10441. jne L_AES_GCM_encrypt_final_avx1_store_tag_loop
  10442. jmp L_AES_GCM_encrypt_final_avx1_store_tag_done
  10443. L_AES_GCM_encrypt_final_avx1_store_tag_16:
  10444. vmovdqu OWORD PTR [r9], xmm0
  10445. L_AES_GCM_encrypt_final_avx1_store_tag_done:
  10446. vzeroupper
  10447. add rsp, 16
  10448. pop r14
  10449. pop r12
  10450. pop r13
  10451. ret
  10452. AES_GCM_encrypt_final_avx1 ENDP
  10453. _text ENDS
  10454. _text SEGMENT READONLY PARA
  10455. AES_GCM_decrypt_update_avx1 PROC
  10456. push r13
  10457. push r12
  10458. push r14
  10459. push r15
  10460. push rdi
  10461. mov rax, rcx
  10462. mov r10, r8
  10463. mov r8d, edx
  10464. mov r11, r9
  10465. mov r9d, DWORD PTR [rsp+80]
  10466. mov r12, QWORD PTR [rsp+88]
  10467. mov r14, QWORD PTR [rsp+96]
  10468. mov r15, QWORD PTR [rsp+104]
  10469. sub rsp, 168
  10470. vmovdqa xmm6, OWORD PTR [r12]
  10471. vmovdqa xmm5, OWORD PTR [r14]
  10472. vpsrlq xmm9, xmm5, 63
  10473. vpsllq xmm8, xmm5, 1
  10474. vpslldq xmm9, xmm9, 8
  10475. vpor xmm8, xmm8, xmm9
  10476. vpshufd xmm5, xmm5, 255
  10477. vpsrad xmm5, xmm5, 31
  10478. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  10479. vpxor xmm5, xmm5, xmm8
  10480. xor edi, edi
  10481. cmp r9d, 128
  10482. mov r13d, r9d
  10483. jl L_AES_GCM_decrypt_update_avx1_done_128
  10484. and r13d, 4294967168
  10485. vmovdqa xmm2, xmm6
  10486. ; H ^ 1
  10487. vmovdqu OWORD PTR [rsp], xmm5
  10488. ; H ^ 2
  10489. vpclmulqdq xmm8, xmm5, xmm5, 0
  10490. vpclmulqdq xmm0, xmm5, xmm5, 17
  10491. vpslld xmm12, xmm8, 31
  10492. vpslld xmm13, xmm8, 30
  10493. vpslld xmm14, xmm8, 25
  10494. vpxor xmm12, xmm12, xmm13
  10495. vpxor xmm12, xmm12, xmm14
  10496. vpsrldq xmm13, xmm12, 4
  10497. vpslldq xmm12, xmm12, 12
  10498. vpxor xmm8, xmm8, xmm12
  10499. vpsrld xmm14, xmm8, 1
  10500. vpsrld xmm10, xmm8, 2
  10501. vpsrld xmm9, xmm8, 7
  10502. vpxor xmm14, xmm14, xmm10
  10503. vpxor xmm14, xmm14, xmm9
  10504. vpxor xmm14, xmm14, xmm13
  10505. vpxor xmm14, xmm14, xmm8
  10506. vpxor xmm0, xmm0, xmm14
  10507. vmovdqu OWORD PTR [rsp+16], xmm0
  10508. ; H ^ 3
  10509. ; ghash_gfmul_red_avx
  10510. vpshufd xmm9, xmm5, 78
  10511. vpshufd xmm10, xmm0, 78
  10512. vpclmulqdq xmm11, xmm0, xmm5, 17
  10513. vpclmulqdq xmm8, xmm0, xmm5, 0
  10514. vpxor xmm9, xmm9, xmm5
  10515. vpxor xmm10, xmm10, xmm0
  10516. vpclmulqdq xmm9, xmm9, xmm10, 0
  10517. vpxor xmm9, xmm9, xmm8
  10518. vpxor xmm9, xmm9, xmm11
  10519. vpslldq xmm10, xmm9, 8
  10520. vpsrldq xmm9, xmm9, 8
  10521. vpxor xmm8, xmm8, xmm10
  10522. vpxor xmm1, xmm11, xmm9
  10523. vpslld xmm12, xmm8, 31
  10524. vpslld xmm13, xmm8, 30
  10525. vpslld xmm14, xmm8, 25
  10526. vpxor xmm12, xmm12, xmm13
  10527. vpxor xmm12, xmm12, xmm14
  10528. vpsrldq xmm13, xmm12, 4
  10529. vpslldq xmm12, xmm12, 12
  10530. vpxor xmm8, xmm8, xmm12
  10531. vpsrld xmm14, xmm8, 1
  10532. vpsrld xmm10, xmm8, 2
  10533. vpsrld xmm9, xmm8, 7
  10534. vpxor xmm14, xmm14, xmm10
  10535. vpxor xmm14, xmm14, xmm9
  10536. vpxor xmm14, xmm14, xmm13
  10537. vpxor xmm14, xmm14, xmm8
  10538. vpxor xmm1, xmm1, xmm14
  10539. vmovdqu OWORD PTR [rsp+32], xmm1
  10540. ; H ^ 4
  10541. vpclmulqdq xmm8, xmm0, xmm0, 0
  10542. vpclmulqdq xmm3, xmm0, xmm0, 17
  10543. vpslld xmm12, xmm8, 31
  10544. vpslld xmm13, xmm8, 30
  10545. vpslld xmm14, xmm8, 25
  10546. vpxor xmm12, xmm12, xmm13
  10547. vpxor xmm12, xmm12, xmm14
  10548. vpsrldq xmm13, xmm12, 4
  10549. vpslldq xmm12, xmm12, 12
  10550. vpxor xmm8, xmm8, xmm12
  10551. vpsrld xmm14, xmm8, 1
  10552. vpsrld xmm10, xmm8, 2
  10553. vpsrld xmm9, xmm8, 7
  10554. vpxor xmm14, xmm14, xmm10
  10555. vpxor xmm14, xmm14, xmm9
  10556. vpxor xmm14, xmm14, xmm13
  10557. vpxor xmm14, xmm14, xmm8
  10558. vpxor xmm3, xmm3, xmm14
  10559. vmovdqu OWORD PTR [rsp+48], xmm3
  10560. ; H ^ 5
  10561. ; ghash_gfmul_red_avx
  10562. vpshufd xmm9, xmm0, 78
  10563. vpshufd xmm10, xmm1, 78
  10564. vpclmulqdq xmm11, xmm1, xmm0, 17
  10565. vpclmulqdq xmm8, xmm1, xmm0, 0
  10566. vpxor xmm9, xmm9, xmm0
  10567. vpxor xmm10, xmm10, xmm1
  10568. vpclmulqdq xmm9, xmm9, xmm10, 0
  10569. vpxor xmm9, xmm9, xmm8
  10570. vpxor xmm9, xmm9, xmm11
  10571. vpslldq xmm10, xmm9, 8
  10572. vpsrldq xmm9, xmm9, 8
  10573. vpxor xmm8, xmm8, xmm10
  10574. vpxor xmm7, xmm11, xmm9
  10575. vpslld xmm12, xmm8, 31
  10576. vpslld xmm13, xmm8, 30
  10577. vpslld xmm14, xmm8, 25
  10578. vpxor xmm12, xmm12, xmm13
  10579. vpxor xmm12, xmm12, xmm14
  10580. vpsrldq xmm13, xmm12, 4
  10581. vpslldq xmm12, xmm12, 12
  10582. vpxor xmm8, xmm8, xmm12
  10583. vpsrld xmm14, xmm8, 1
  10584. vpsrld xmm10, xmm8, 2
  10585. vpsrld xmm9, xmm8, 7
  10586. vpxor xmm14, xmm14, xmm10
  10587. vpxor xmm14, xmm14, xmm9
  10588. vpxor xmm14, xmm14, xmm13
  10589. vpxor xmm14, xmm14, xmm8
  10590. vpxor xmm7, xmm7, xmm14
  10591. vmovdqu OWORD PTR [rsp+64], xmm7
  10592. ; H ^ 6
  10593. vpclmulqdq xmm8, xmm1, xmm1, 0
  10594. vpclmulqdq xmm7, xmm1, xmm1, 17
  10595. vpslld xmm12, xmm8, 31
  10596. vpslld xmm13, xmm8, 30
  10597. vpslld xmm14, xmm8, 25
  10598. vpxor xmm12, xmm12, xmm13
  10599. vpxor xmm12, xmm12, xmm14
  10600. vpsrldq xmm13, xmm12, 4
  10601. vpslldq xmm12, xmm12, 12
  10602. vpxor xmm8, xmm8, xmm12
  10603. vpsrld xmm14, xmm8, 1
  10604. vpsrld xmm10, xmm8, 2
  10605. vpsrld xmm9, xmm8, 7
  10606. vpxor xmm14, xmm14, xmm10
  10607. vpxor xmm14, xmm14, xmm9
  10608. vpxor xmm14, xmm14, xmm13
  10609. vpxor xmm14, xmm14, xmm8
  10610. vpxor xmm7, xmm7, xmm14
  10611. vmovdqu OWORD PTR [rsp+80], xmm7
  10612. ; H ^ 7
  10613. ; ghash_gfmul_red_avx
  10614. vpshufd xmm9, xmm1, 78
  10615. vpshufd xmm10, xmm3, 78
  10616. vpclmulqdq xmm11, xmm3, xmm1, 17
  10617. vpclmulqdq xmm8, xmm3, xmm1, 0
  10618. vpxor xmm9, xmm9, xmm1
  10619. vpxor xmm10, xmm10, xmm3
  10620. vpclmulqdq xmm9, xmm9, xmm10, 0
  10621. vpxor xmm9, xmm9, xmm8
  10622. vpxor xmm9, xmm9, xmm11
  10623. vpslldq xmm10, xmm9, 8
  10624. vpsrldq xmm9, xmm9, 8
  10625. vpxor xmm8, xmm8, xmm10
  10626. vpxor xmm7, xmm11, xmm9
  10627. vpslld xmm12, xmm8, 31
  10628. vpslld xmm13, xmm8, 30
  10629. vpslld xmm14, xmm8, 25
  10630. vpxor xmm12, xmm12, xmm13
  10631. vpxor xmm12, xmm12, xmm14
  10632. vpsrldq xmm13, xmm12, 4
  10633. vpslldq xmm12, xmm12, 12
  10634. vpxor xmm8, xmm8, xmm12
  10635. vpsrld xmm14, xmm8, 1
  10636. vpsrld xmm10, xmm8, 2
  10637. vpsrld xmm9, xmm8, 7
  10638. vpxor xmm14, xmm14, xmm10
  10639. vpxor xmm14, xmm14, xmm9
  10640. vpxor xmm14, xmm14, xmm13
  10641. vpxor xmm14, xmm14, xmm8
  10642. vpxor xmm7, xmm7, xmm14
  10643. vmovdqu OWORD PTR [rsp+96], xmm7
  10644. ; H ^ 8
  10645. vpclmulqdq xmm8, xmm3, xmm3, 0
  10646. vpclmulqdq xmm7, xmm3, xmm3, 17
  10647. vpslld xmm12, xmm8, 31
  10648. vpslld xmm13, xmm8, 30
  10649. vpslld xmm14, xmm8, 25
  10650. vpxor xmm12, xmm12, xmm13
  10651. vpxor xmm12, xmm12, xmm14
  10652. vpsrldq xmm13, xmm12, 4
  10653. vpslldq xmm12, xmm12, 12
  10654. vpxor xmm8, xmm8, xmm12
  10655. vpsrld xmm14, xmm8, 1
  10656. vpsrld xmm10, xmm8, 2
  10657. vpsrld xmm9, xmm8, 7
  10658. vpxor xmm14, xmm14, xmm10
  10659. vpxor xmm14, xmm14, xmm9
  10660. vpxor xmm14, xmm14, xmm13
  10661. vpxor xmm14, xmm14, xmm8
  10662. vpxor xmm7, xmm7, xmm14
  10663. vmovdqu OWORD PTR [rsp+112], xmm7
  10664. L_AES_GCM_decrypt_update_avx1_ghash_128:
  10665. lea rcx, QWORD PTR [r11+rdi]
  10666. lea rdx, QWORD PTR [r10+rdi]
  10667. vmovdqu xmm0, OWORD PTR [r15]
  10668. vmovdqa xmm1, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  10669. vpshufb xmm8, xmm0, xmm1
  10670. vpaddd xmm9, xmm0, OWORD PTR L_avx1_aes_gcm_one
  10671. vpshufb xmm9, xmm9, xmm1
  10672. vpaddd xmm10, xmm0, OWORD PTR L_avx1_aes_gcm_two
  10673. vpshufb xmm10, xmm10, xmm1
  10674. vpaddd xmm11, xmm0, OWORD PTR L_avx1_aes_gcm_three
  10675. vpshufb xmm11, xmm11, xmm1
  10676. vpaddd xmm12, xmm0, OWORD PTR L_avx1_aes_gcm_four
  10677. vpshufb xmm12, xmm12, xmm1
  10678. vpaddd xmm13, xmm0, OWORD PTR L_avx1_aes_gcm_five
  10679. vpshufb xmm13, xmm13, xmm1
  10680. vpaddd xmm14, xmm0, OWORD PTR L_avx1_aes_gcm_six
  10681. vpshufb xmm14, xmm14, xmm1
  10682. vpaddd xmm15, xmm0, OWORD PTR L_avx1_aes_gcm_seven
  10683. vpshufb xmm15, xmm15, xmm1
  10684. vpaddd xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_eight
  10685. vmovdqa xmm7, OWORD PTR [rax]
  10686. vmovdqu OWORD PTR [r15], xmm0
  10687. vpxor xmm8, xmm8, xmm7
  10688. vpxor xmm9, xmm9, xmm7
  10689. vpxor xmm10, xmm10, xmm7
  10690. vpxor xmm11, xmm11, xmm7
  10691. vpxor xmm12, xmm12, xmm7
  10692. vpxor xmm13, xmm13, xmm7
  10693. vpxor xmm14, xmm14, xmm7
  10694. vpxor xmm15, xmm15, xmm7
  10695. vmovdqu xmm7, OWORD PTR [rsp+112]
  10696. vmovdqu xmm0, OWORD PTR [rcx]
  10697. vaesenc xmm8, xmm8, [rax+16]
  10698. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10699. vpxor xmm0, xmm0, xmm2
  10700. vpshufd xmm1, xmm7, 78
  10701. vpshufd xmm5, xmm0, 78
  10702. vpxor xmm1, xmm1, xmm7
  10703. vpxor xmm5, xmm5, xmm0
  10704. vpclmulqdq xmm3, xmm0, xmm7, 17
  10705. vaesenc xmm9, xmm9, [rax+16]
  10706. vaesenc xmm10, xmm10, [rax+16]
  10707. vpclmulqdq xmm2, xmm0, xmm7, 0
  10708. vaesenc xmm11, xmm11, [rax+16]
  10709. vaesenc xmm12, xmm12, [rax+16]
  10710. vpclmulqdq xmm1, xmm1, xmm5, 0
  10711. vaesenc xmm13, xmm13, [rax+16]
  10712. vaesenc xmm14, xmm14, [rax+16]
  10713. vaesenc xmm15, xmm15, [rax+16]
  10714. vpxor xmm1, xmm1, xmm2
  10715. vpxor xmm1, xmm1, xmm3
  10716. vmovdqu xmm7, OWORD PTR [rsp+96]
  10717. vmovdqu xmm0, OWORD PTR [rcx+16]
  10718. vpshufd xmm4, xmm7, 78
  10719. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10720. vaesenc xmm8, xmm8, [rax+32]
  10721. vpxor xmm4, xmm4, xmm7
  10722. vpshufd xmm5, xmm0, 78
  10723. vpxor xmm5, xmm5, xmm0
  10724. vpclmulqdq xmm6, xmm0, xmm7, 17
  10725. vaesenc xmm9, xmm9, [rax+32]
  10726. vaesenc xmm10, xmm10, [rax+32]
  10727. vpclmulqdq xmm7, xmm0, xmm7, 0
  10728. vaesenc xmm11, xmm11, [rax+32]
  10729. vaesenc xmm12, xmm12, [rax+32]
  10730. vpclmulqdq xmm4, xmm4, xmm5, 0
  10731. vaesenc xmm13, xmm13, [rax+32]
  10732. vaesenc xmm14, xmm14, [rax+32]
  10733. vaesenc xmm15, xmm15, [rax+32]
  10734. vpxor xmm1, xmm1, xmm7
  10735. vpxor xmm2, xmm2, xmm7
  10736. vpxor xmm1, xmm1, xmm6
  10737. vpxor xmm3, xmm3, xmm6
  10738. vpxor xmm1, xmm1, xmm4
  10739. vmovdqu xmm7, OWORD PTR [rsp+80]
  10740. vmovdqu xmm0, OWORD PTR [rcx+32]
  10741. vpshufd xmm4, xmm7, 78
  10742. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10743. vaesenc xmm8, xmm8, [rax+48]
  10744. vpxor xmm4, xmm4, xmm7
  10745. vpshufd xmm5, xmm0, 78
  10746. vpxor xmm5, xmm5, xmm0
  10747. vpclmulqdq xmm6, xmm0, xmm7, 17
  10748. vaesenc xmm9, xmm9, [rax+48]
  10749. vaesenc xmm10, xmm10, [rax+48]
  10750. vpclmulqdq xmm7, xmm0, xmm7, 0
  10751. vaesenc xmm11, xmm11, [rax+48]
  10752. vaesenc xmm12, xmm12, [rax+48]
  10753. vpclmulqdq xmm4, xmm4, xmm5, 0
  10754. vaesenc xmm13, xmm13, [rax+48]
  10755. vaesenc xmm14, xmm14, [rax+48]
  10756. vaesenc xmm15, xmm15, [rax+48]
  10757. vpxor xmm1, xmm1, xmm7
  10758. vpxor xmm2, xmm2, xmm7
  10759. vpxor xmm1, xmm1, xmm6
  10760. vpxor xmm3, xmm3, xmm6
  10761. vpxor xmm1, xmm1, xmm4
  10762. vmovdqu xmm7, OWORD PTR [rsp+64]
  10763. vmovdqu xmm0, OWORD PTR [rcx+48]
  10764. vpshufd xmm4, xmm7, 78
  10765. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10766. vaesenc xmm8, xmm8, [rax+64]
  10767. vpxor xmm4, xmm4, xmm7
  10768. vpshufd xmm5, xmm0, 78
  10769. vpxor xmm5, xmm5, xmm0
  10770. vpclmulqdq xmm6, xmm0, xmm7, 17
  10771. vaesenc xmm9, xmm9, [rax+64]
  10772. vaesenc xmm10, xmm10, [rax+64]
  10773. vpclmulqdq xmm7, xmm0, xmm7, 0
  10774. vaesenc xmm11, xmm11, [rax+64]
  10775. vaesenc xmm12, xmm12, [rax+64]
  10776. vpclmulqdq xmm4, xmm4, xmm5, 0
  10777. vaesenc xmm13, xmm13, [rax+64]
  10778. vaesenc xmm14, xmm14, [rax+64]
  10779. vaesenc xmm15, xmm15, [rax+64]
  10780. vpxor xmm1, xmm1, xmm7
  10781. vpxor xmm2, xmm2, xmm7
  10782. vpxor xmm1, xmm1, xmm6
  10783. vpxor xmm3, xmm3, xmm6
  10784. vpxor xmm1, xmm1, xmm4
  10785. vmovdqu xmm7, OWORD PTR [rsp+48]
  10786. vmovdqu xmm0, OWORD PTR [rcx+64]
  10787. vpshufd xmm4, xmm7, 78
  10788. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10789. vaesenc xmm8, xmm8, [rax+80]
  10790. vpxor xmm4, xmm4, xmm7
  10791. vpshufd xmm5, xmm0, 78
  10792. vpxor xmm5, xmm5, xmm0
  10793. vpclmulqdq xmm6, xmm0, xmm7, 17
  10794. vaesenc xmm9, xmm9, [rax+80]
  10795. vaesenc xmm10, xmm10, [rax+80]
  10796. vpclmulqdq xmm7, xmm0, xmm7, 0
  10797. vaesenc xmm11, xmm11, [rax+80]
  10798. vaesenc xmm12, xmm12, [rax+80]
  10799. vpclmulqdq xmm4, xmm4, xmm5, 0
  10800. vaesenc xmm13, xmm13, [rax+80]
  10801. vaesenc xmm14, xmm14, [rax+80]
  10802. vaesenc xmm15, xmm15, [rax+80]
  10803. vpxor xmm1, xmm1, xmm7
  10804. vpxor xmm2, xmm2, xmm7
  10805. vpxor xmm1, xmm1, xmm6
  10806. vpxor xmm3, xmm3, xmm6
  10807. vpxor xmm1, xmm1, xmm4
  10808. vmovdqu xmm7, OWORD PTR [rsp+32]
  10809. vmovdqu xmm0, OWORD PTR [rcx+80]
  10810. vpshufd xmm4, xmm7, 78
  10811. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10812. vaesenc xmm8, xmm8, [rax+96]
  10813. vpxor xmm4, xmm4, xmm7
  10814. vpshufd xmm5, xmm0, 78
  10815. vpxor xmm5, xmm5, xmm0
  10816. vpclmulqdq xmm6, xmm0, xmm7, 17
  10817. vaesenc xmm9, xmm9, [rax+96]
  10818. vaesenc xmm10, xmm10, [rax+96]
  10819. vpclmulqdq xmm7, xmm0, xmm7, 0
  10820. vaesenc xmm11, xmm11, [rax+96]
  10821. vaesenc xmm12, xmm12, [rax+96]
  10822. vpclmulqdq xmm4, xmm4, xmm5, 0
  10823. vaesenc xmm13, xmm13, [rax+96]
  10824. vaesenc xmm14, xmm14, [rax+96]
  10825. vaesenc xmm15, xmm15, [rax+96]
  10826. vpxor xmm1, xmm1, xmm7
  10827. vpxor xmm2, xmm2, xmm7
  10828. vpxor xmm1, xmm1, xmm6
  10829. vpxor xmm3, xmm3, xmm6
  10830. vpxor xmm1, xmm1, xmm4
  10831. vmovdqu xmm7, OWORD PTR [rsp+16]
  10832. vmovdqu xmm0, OWORD PTR [rcx+96]
  10833. vpshufd xmm4, xmm7, 78
  10834. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10835. vaesenc xmm8, xmm8, [rax+112]
  10836. vpxor xmm4, xmm4, xmm7
  10837. vpshufd xmm5, xmm0, 78
  10838. vpxor xmm5, xmm5, xmm0
  10839. vpclmulqdq xmm6, xmm0, xmm7, 17
  10840. vaesenc xmm9, xmm9, [rax+112]
  10841. vaesenc xmm10, xmm10, [rax+112]
  10842. vpclmulqdq xmm7, xmm0, xmm7, 0
  10843. vaesenc xmm11, xmm11, [rax+112]
  10844. vaesenc xmm12, xmm12, [rax+112]
  10845. vpclmulqdq xmm4, xmm4, xmm5, 0
  10846. vaesenc xmm13, xmm13, [rax+112]
  10847. vaesenc xmm14, xmm14, [rax+112]
  10848. vaesenc xmm15, xmm15, [rax+112]
  10849. vpxor xmm1, xmm1, xmm7
  10850. vpxor xmm2, xmm2, xmm7
  10851. vpxor xmm1, xmm1, xmm6
  10852. vpxor xmm3, xmm3, xmm6
  10853. vpxor xmm1, xmm1, xmm4
  10854. vmovdqu xmm7, OWORD PTR [rsp]
  10855. vmovdqu xmm0, OWORD PTR [rcx+112]
  10856. vpshufd xmm4, xmm7, 78
  10857. vpshufb xmm0, xmm0, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10858. vaesenc xmm8, xmm8, [rax+128]
  10859. vpxor xmm4, xmm4, xmm7
  10860. vpshufd xmm5, xmm0, 78
  10861. vpxor xmm5, xmm5, xmm0
  10862. vpclmulqdq xmm6, xmm0, xmm7, 17
  10863. vaesenc xmm9, xmm9, [rax+128]
  10864. vaesenc xmm10, xmm10, [rax+128]
  10865. vpclmulqdq xmm7, xmm0, xmm7, 0
  10866. vaesenc xmm11, xmm11, [rax+128]
  10867. vaesenc xmm12, xmm12, [rax+128]
  10868. vpclmulqdq xmm4, xmm4, xmm5, 0
  10869. vaesenc xmm13, xmm13, [rax+128]
  10870. vaesenc xmm14, xmm14, [rax+128]
  10871. vaesenc xmm15, xmm15, [rax+128]
  10872. vpxor xmm1, xmm1, xmm7
  10873. vpxor xmm2, xmm2, xmm7
  10874. vpxor xmm1, xmm1, xmm6
  10875. vpxor xmm3, xmm3, xmm6
  10876. vpxor xmm1, xmm1, xmm4
  10877. vpslldq xmm5, xmm1, 8
  10878. vpsrldq xmm1, xmm1, 8
  10879. vaesenc xmm8, xmm8, [rax+144]
  10880. vpxor xmm2, xmm2, xmm5
  10881. vpxor xmm3, xmm3, xmm1
  10882. vaesenc xmm9, xmm9, [rax+144]
  10883. vpslld xmm7, xmm2, 31
  10884. vpslld xmm4, xmm2, 30
  10885. vpslld xmm5, xmm2, 25
  10886. vaesenc xmm10, xmm10, [rax+144]
  10887. vpxor xmm7, xmm7, xmm4
  10888. vpxor xmm7, xmm7, xmm5
  10889. vaesenc xmm11, xmm11, [rax+144]
  10890. vpsrldq xmm4, xmm7, 4
  10891. vpslldq xmm7, xmm7, 12
  10892. vaesenc xmm12, xmm12, [rax+144]
  10893. vpxor xmm2, xmm2, xmm7
  10894. vpsrld xmm5, xmm2, 1
  10895. vaesenc xmm13, xmm13, [rax+144]
  10896. vpsrld xmm1, xmm2, 2
  10897. vpsrld xmm0, xmm2, 7
  10898. vaesenc xmm14, xmm14, [rax+144]
  10899. vpxor xmm5, xmm5, xmm1
  10900. vpxor xmm5, xmm5, xmm0
  10901. vaesenc xmm15, xmm15, [rax+144]
  10902. vpxor xmm5, xmm5, xmm4
  10903. vpxor xmm2, xmm2, xmm5
  10904. vpxor xmm2, xmm2, xmm3
  10905. cmp r8d, 11
  10906. vmovdqa xmm7, OWORD PTR [rax+160]
  10907. jl L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done
  10908. vaesenc xmm8, xmm8, xmm7
  10909. vaesenc xmm9, xmm9, xmm7
  10910. vaesenc xmm10, xmm10, xmm7
  10911. vaesenc xmm11, xmm11, xmm7
  10912. vaesenc xmm12, xmm12, xmm7
  10913. vaesenc xmm13, xmm13, xmm7
  10914. vaesenc xmm14, xmm14, xmm7
  10915. vaesenc xmm15, xmm15, xmm7
  10916. vmovdqa xmm7, OWORD PTR [rax+176]
  10917. vaesenc xmm8, xmm8, xmm7
  10918. vaesenc xmm9, xmm9, xmm7
  10919. vaesenc xmm10, xmm10, xmm7
  10920. vaesenc xmm11, xmm11, xmm7
  10921. vaesenc xmm12, xmm12, xmm7
  10922. vaesenc xmm13, xmm13, xmm7
  10923. vaesenc xmm14, xmm14, xmm7
  10924. vaesenc xmm15, xmm15, xmm7
  10925. cmp r8d, 13
  10926. vmovdqa xmm7, OWORD PTR [rax+192]
  10927. jl L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done
  10928. vaesenc xmm8, xmm8, xmm7
  10929. vaesenc xmm9, xmm9, xmm7
  10930. vaesenc xmm10, xmm10, xmm7
  10931. vaesenc xmm11, xmm11, xmm7
  10932. vaesenc xmm12, xmm12, xmm7
  10933. vaesenc xmm13, xmm13, xmm7
  10934. vaesenc xmm14, xmm14, xmm7
  10935. vaesenc xmm15, xmm15, xmm7
  10936. vmovdqa xmm7, OWORD PTR [rax+208]
  10937. vaesenc xmm8, xmm8, xmm7
  10938. vaesenc xmm9, xmm9, xmm7
  10939. vaesenc xmm10, xmm10, xmm7
  10940. vaesenc xmm11, xmm11, xmm7
  10941. vaesenc xmm12, xmm12, xmm7
  10942. vaesenc xmm13, xmm13, xmm7
  10943. vaesenc xmm14, xmm14, xmm7
  10944. vaesenc xmm15, xmm15, xmm7
  10945. vmovdqa xmm7, OWORD PTR [rax+224]
  10946. L_AES_GCM_decrypt_update_avx1_aesenc_128_ghash_avx_done:
  10947. vaesenclast xmm8, xmm8, xmm7
  10948. vaesenclast xmm9, xmm9, xmm7
  10949. vmovdqu xmm0, OWORD PTR [rcx]
  10950. vmovdqu xmm1, OWORD PTR [rcx+16]
  10951. vpxor xmm8, xmm8, xmm0
  10952. vpxor xmm9, xmm9, xmm1
  10953. vmovdqu OWORD PTR [rdx], xmm8
  10954. vmovdqu OWORD PTR [rdx+16], xmm9
  10955. vaesenclast xmm10, xmm10, xmm7
  10956. vaesenclast xmm11, xmm11, xmm7
  10957. vmovdqu xmm0, OWORD PTR [rcx+32]
  10958. vmovdqu xmm1, OWORD PTR [rcx+48]
  10959. vpxor xmm10, xmm10, xmm0
  10960. vpxor xmm11, xmm11, xmm1
  10961. vmovdqu OWORD PTR [rdx+32], xmm10
  10962. vmovdqu OWORD PTR [rdx+48], xmm11
  10963. vaesenclast xmm12, xmm12, xmm7
  10964. vaesenclast xmm13, xmm13, xmm7
  10965. vmovdqu xmm0, OWORD PTR [rcx+64]
  10966. vmovdqu xmm1, OWORD PTR [rcx+80]
  10967. vpxor xmm12, xmm12, xmm0
  10968. vpxor xmm13, xmm13, xmm1
  10969. vmovdqu OWORD PTR [rdx+64], xmm12
  10970. vmovdqu OWORD PTR [rdx+80], xmm13
  10971. vaesenclast xmm14, xmm14, xmm7
  10972. vaesenclast xmm15, xmm15, xmm7
  10973. vmovdqu xmm0, OWORD PTR [rcx+96]
  10974. vmovdqu xmm1, OWORD PTR [rcx+112]
  10975. vpxor xmm14, xmm14, xmm0
  10976. vpxor xmm15, xmm15, xmm1
  10977. vmovdqu OWORD PTR [rdx+96], xmm14
  10978. vmovdqu OWORD PTR [rdx+112], xmm15
  10979. add edi, 128
  10980. cmp edi, r13d
  10981. jl L_AES_GCM_decrypt_update_avx1_ghash_128
  10982. vmovdqa xmm6, xmm2
  10983. vmovdqu xmm5, OWORD PTR [rsp]
  10984. L_AES_GCM_decrypt_update_avx1_done_128:
  10985. mov edx, r9d
  10986. cmp edi, edx
  10987. jge L_AES_GCM_decrypt_update_avx1_done_dec
  10988. mov r13d, r9d
  10989. and r13d, 4294967280
  10990. cmp edi, r13d
  10991. jge L_AES_GCM_decrypt_update_avx1_last_block_done
  10992. L_AES_GCM_decrypt_update_avx1_last_block_start:
  10993. vmovdqu xmm13, OWORD PTR [r11+rdi]
  10994. vmovdqa xmm0, xmm5
  10995. vpshufb xmm1, xmm13, OWORD PTR L_avx1_aes_gcm_bswap_mask
  10996. vpxor xmm1, xmm1, xmm6
  10997. vmovdqu xmm9, OWORD PTR [r15]
  10998. vpshufb xmm8, xmm9, OWORD PTR L_avx1_aes_gcm_bswap_epi64
  10999. vpaddd xmm9, xmm9, OWORD PTR L_avx1_aes_gcm_one
  11000. vmovdqu OWORD PTR [r15], xmm9
  11001. vpxor xmm8, xmm8, [rax]
  11002. vpclmulqdq xmm10, xmm1, xmm0, 16
  11003. vaesenc xmm8, xmm8, [rax+16]
  11004. vaesenc xmm8, xmm8, [rax+32]
  11005. vpclmulqdq xmm11, xmm1, xmm0, 1
  11006. vaesenc xmm8, xmm8, [rax+48]
  11007. vaesenc xmm8, xmm8, [rax+64]
  11008. vpclmulqdq xmm12, xmm1, xmm0, 0
  11009. vaesenc xmm8, xmm8, [rax+80]
  11010. vpclmulqdq xmm1, xmm1, xmm0, 17
  11011. vaesenc xmm8, xmm8, [rax+96]
  11012. vpxor xmm10, xmm10, xmm11
  11013. vpslldq xmm2, xmm10, 8
  11014. vpsrldq xmm10, xmm10, 8
  11015. vaesenc xmm8, xmm8, [rax+112]
  11016. vpxor xmm2, xmm2, xmm12
  11017. vpxor xmm3, xmm1, xmm10
  11018. vmovdqa xmm0, OWORD PTR L_avx1_aes_gcm_mod2_128
  11019. vpclmulqdq xmm11, xmm2, xmm0, 16
  11020. vaesenc xmm8, xmm8, [rax+128]
  11021. vpshufd xmm10, xmm2, 78
  11022. vpxor xmm10, xmm10, xmm11
  11023. vpclmulqdq xmm11, xmm10, xmm0, 16
  11024. vaesenc xmm8, xmm8, [rax+144]
  11025. vpshufd xmm10, xmm10, 78
  11026. vpxor xmm10, xmm10, xmm11
  11027. vpxor xmm6, xmm10, xmm3
  11028. cmp r8d, 11
  11029. vmovdqa xmm9, OWORD PTR [rax+160]
  11030. jl L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last
  11031. vaesenc xmm8, xmm8, xmm9
  11032. vaesenc xmm8, xmm8, [rax+176]
  11033. cmp r8d, 13
  11034. vmovdqa xmm9, OWORD PTR [rax+192]
  11035. jl L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last
  11036. vaesenc xmm8, xmm8, xmm9
  11037. vaesenc xmm8, xmm8, [rax+208]
  11038. vmovdqa xmm9, OWORD PTR [rax+224]
  11039. L_AES_GCM_decrypt_update_avx1_aesenc_gfmul_last:
  11040. vaesenclast xmm8, xmm8, xmm9
  11041. vmovdqa xmm0, xmm13
  11042. vpxor xmm8, xmm8, xmm0
  11043. vmovdqu OWORD PTR [r10+rdi], xmm8
  11044. add edi, 16
  11045. cmp edi, r13d
  11046. jl L_AES_GCM_decrypt_update_avx1_last_block_start
  11047. L_AES_GCM_decrypt_update_avx1_last_block_done:
  11048. L_AES_GCM_decrypt_update_avx1_done_dec:
  11049. vmovdqa OWORD PTR [r12], xmm6
  11050. vzeroupper
  11051. add rsp, 168
  11052. pop rdi
  11053. pop r15
  11054. pop r14
  11055. pop r12
  11056. pop r13
  11057. ret
  11058. AES_GCM_decrypt_update_avx1 ENDP
  11059. _text ENDS
  11060. _text SEGMENT READONLY PARA
  11061. AES_GCM_decrypt_final_avx1 PROC
  11062. push r13
  11063. push r12
  11064. push r14
  11065. push rbp
  11066. push r15
  11067. mov rax, rcx
  11068. mov r10d, r9d
  11069. mov r9, rdx
  11070. mov r11d, DWORD PTR [rsp+80]
  11071. mov r12, QWORD PTR [rsp+88]
  11072. mov r14, QWORD PTR [rsp+96]
  11073. mov rbp, QWORD PTR [rsp+104]
  11074. sub rsp, 16
  11075. vmovdqa xmm6, OWORD PTR [rax]
  11076. vmovdqa xmm5, OWORD PTR [r12]
  11077. vmovdqa xmm15, OWORD PTR [r14]
  11078. vpsrlq xmm9, xmm5, 63
  11079. vpsllq xmm8, xmm5, 1
  11080. vpslldq xmm9, xmm9, 8
  11081. vpor xmm8, xmm8, xmm9
  11082. vpshufd xmm5, xmm5, 255
  11083. vpsrad xmm5, xmm5, 31
  11084. vpand xmm5, xmm5, OWORD PTR L_avx1_aes_gcm_mod2_128
  11085. vpxor xmm5, xmm5, xmm8
  11086. mov edx, r10d
  11087. mov ecx, r11d
  11088. shl rdx, 3
  11089. shl rcx, 3
  11090. vmovq xmm0, rdx
  11091. vmovq xmm1, rcx
  11092. vpunpcklqdq xmm0, xmm0, xmm1
  11093. vpxor xmm6, xmm6, xmm0
  11094. ; ghash_gfmul_red_avx
  11095. vpshufd xmm9, xmm5, 78
  11096. vpshufd xmm10, xmm6, 78
  11097. vpclmulqdq xmm11, xmm6, xmm5, 17
  11098. vpclmulqdq xmm8, xmm6, xmm5, 0
  11099. vpxor xmm9, xmm9, xmm5
  11100. vpxor xmm10, xmm10, xmm6
  11101. vpclmulqdq xmm9, xmm9, xmm10, 0
  11102. vpxor xmm9, xmm9, xmm8
  11103. vpxor xmm9, xmm9, xmm11
  11104. vpslldq xmm10, xmm9, 8
  11105. vpsrldq xmm9, xmm9, 8
  11106. vpxor xmm8, xmm8, xmm10
  11107. vpxor xmm6, xmm11, xmm9
  11108. vpslld xmm12, xmm8, 31
  11109. vpslld xmm13, xmm8, 30
  11110. vpslld xmm14, xmm8, 25
  11111. vpxor xmm12, xmm12, xmm13
  11112. vpxor xmm12, xmm12, xmm14
  11113. vpsrldq xmm13, xmm12, 4
  11114. vpslldq xmm12, xmm12, 12
  11115. vpxor xmm8, xmm8, xmm12
  11116. vpsrld xmm14, xmm8, 1
  11117. vpsrld xmm10, xmm8, 2
  11118. vpsrld xmm9, xmm8, 7
  11119. vpxor xmm14, xmm14, xmm10
  11120. vpxor xmm14, xmm14, xmm9
  11121. vpxor xmm14, xmm14, xmm13
  11122. vpxor xmm14, xmm14, xmm8
  11123. vpxor xmm6, xmm6, xmm14
  11124. vpshufb xmm6, xmm6, OWORD PTR L_avx1_aes_gcm_bswap_mask
  11125. vpxor xmm0, xmm6, xmm15
  11126. cmp r8d, 16
  11127. je L_AES_GCM_decrypt_final_avx1_cmp_tag_16
  11128. sub rsp, 16
  11129. xor rcx, rcx
  11130. xor r15, r15
  11131. vmovdqu OWORD PTR [rsp], xmm0
  11132. L_AES_GCM_decrypt_final_avx1_cmp_tag_loop:
  11133. movzx r13d, BYTE PTR [rsp+rcx]
  11134. xor r13b, BYTE PTR [r9+rcx]
  11135. or r15b, r13b
  11136. inc ecx
  11137. cmp ecx, r8d
  11138. jne L_AES_GCM_decrypt_final_avx1_cmp_tag_loop
  11139. cmp r15, 0
  11140. sete r15b
  11141. add rsp, 16
  11142. xor rcx, rcx
  11143. jmp L_AES_GCM_decrypt_final_avx1_cmp_tag_done
  11144. L_AES_GCM_decrypt_final_avx1_cmp_tag_16:
  11145. vmovdqu xmm1, OWORD PTR [r9]
  11146. vpcmpeqb xmm0, xmm0, xmm1
  11147. vpmovmskb rdx, xmm0
  11148. ; %%edx == 0xFFFF then return 1 else => return 0
  11149. xor r15d, r15d
  11150. cmp edx, 65535
  11151. sete r15b
  11152. L_AES_GCM_decrypt_final_avx1_cmp_tag_done:
  11153. mov DWORD PTR [rbp], r15d
  11154. vzeroupper
  11155. add rsp, 16
  11156. pop r15
  11157. pop rbp
  11158. pop r14
  11159. pop r12
  11160. pop r13
  11161. ret
  11162. AES_GCM_decrypt_final_avx1 ENDP
  11163. _text ENDS
  11164. ENDIF
  11165. IFDEF HAVE_INTEL_AVX2
  11166. _DATA SEGMENT
  11167. ALIGN 16
  11168. L_avx2_aes_gcm_one QWORD 0, 1
  11169. ptr_L_avx2_aes_gcm_one QWORD L_avx2_aes_gcm_one
  11170. _DATA ENDS
  11171. _DATA SEGMENT
  11172. ALIGN 16
  11173. L_avx2_aes_gcm_two QWORD 0, 2
  11174. ptr_L_avx2_aes_gcm_two QWORD L_avx2_aes_gcm_two
  11175. _DATA ENDS
  11176. _DATA SEGMENT
  11177. ALIGN 16
  11178. L_avx2_aes_gcm_three QWORD 0, 3
  11179. ptr_L_avx2_aes_gcm_three QWORD L_avx2_aes_gcm_three
  11180. _DATA ENDS
  11181. _DATA SEGMENT
  11182. ALIGN 16
  11183. L_avx2_aes_gcm_four QWORD 0, 4
  11184. ptr_L_avx2_aes_gcm_four QWORD L_avx2_aes_gcm_four
  11185. _DATA ENDS
  11186. _DATA SEGMENT
  11187. ALIGN 16
  11188. L_avx2_aes_gcm_five QWORD 0, 5
  11189. ptr_L_avx2_aes_gcm_five QWORD L_avx2_aes_gcm_five
  11190. _DATA ENDS
  11191. _DATA SEGMENT
  11192. ALIGN 16
  11193. L_avx2_aes_gcm_six QWORD 0, 6
  11194. ptr_L_avx2_aes_gcm_six QWORD L_avx2_aes_gcm_six
  11195. _DATA ENDS
  11196. _DATA SEGMENT
  11197. ALIGN 16
  11198. L_avx2_aes_gcm_seven QWORD 0, 7
  11199. ptr_L_avx2_aes_gcm_seven QWORD L_avx2_aes_gcm_seven
  11200. _DATA ENDS
  11201. _DATA SEGMENT
  11202. ALIGN 16
  11203. L_avx2_aes_gcm_eight QWORD 0, 8
  11204. ptr_L_avx2_aes_gcm_eight QWORD L_avx2_aes_gcm_eight
  11205. _DATA ENDS
  11206. _DATA SEGMENT
  11207. ALIGN 16
  11208. L_avx2_aes_gcm_bswap_one QWORD 0, 72057594037927936
  11209. ptr_L_avx2_aes_gcm_bswap_one QWORD L_avx2_aes_gcm_bswap_one
  11210. _DATA ENDS
  11211. _DATA SEGMENT
  11212. ALIGN 16
  11213. L_avx2_aes_gcm_bswap_epi64 QWORD 283686952306183, 579005069656919567
  11214. ptr_L_avx2_aes_gcm_bswap_epi64 QWORD L_avx2_aes_gcm_bswap_epi64
  11215. _DATA ENDS
  11216. _DATA SEGMENT
  11217. ALIGN 16
  11218. L_avx2_aes_gcm_bswap_mask QWORD 579005069656919567, 283686952306183
  11219. ptr_L_avx2_aes_gcm_bswap_mask QWORD L_avx2_aes_gcm_bswap_mask
  11220. _DATA ENDS
  11221. _DATA SEGMENT
  11222. ALIGN 16
  11223. L_avx2_aes_gcm_mod2_128 QWORD 1, 13979173243358019584
  11224. ptr_L_avx2_aes_gcm_mod2_128 QWORD L_avx2_aes_gcm_mod2_128
  11225. _DATA ENDS
  11226. _text SEGMENT READONLY PARA
  11227. AES_GCM_encrypt_avx2 PROC
  11228. push r13
  11229. push rdi
  11230. push r12
  11231. push r15
  11232. push rbx
  11233. push r14
  11234. push rsi
  11235. mov rdi, rcx
  11236. mov r12, r8
  11237. mov rax, r9
  11238. mov r15, QWORD PTR [rsp+96]
  11239. mov r8, rdx
  11240. mov r10d, DWORD PTR [rsp+104]
  11241. mov r11d, DWORD PTR [rsp+112]
  11242. mov ebx, DWORD PTR [rsp+120]
  11243. mov r14d, DWORD PTR [rsp+128]
  11244. mov rsi, QWORD PTR [rsp+136]
  11245. mov r9d, DWORD PTR [rsp+144]
  11246. sub rsp, 160
  11247. vpxor xmm4, xmm4, xmm4
  11248. vpxor xmm6, xmm6, xmm6
  11249. mov edx, ebx
  11250. cmp edx, 12
  11251. je L_AES_GCM_encrypt_avx2_iv_12
  11252. ; Calculate values when IV is not 12 bytes
  11253. ; H = Encrypt X(=0)
  11254. vmovdqu xmm5, OWORD PTR [rsi]
  11255. vaesenc xmm5, xmm5, [rsi+16]
  11256. vaesenc xmm5, xmm5, [rsi+32]
  11257. vaesenc xmm5, xmm5, [rsi+48]
  11258. vaesenc xmm5, xmm5, [rsi+64]
  11259. vaesenc xmm5, xmm5, [rsi+80]
  11260. vaesenc xmm5, xmm5, [rsi+96]
  11261. vaesenc xmm5, xmm5, [rsi+112]
  11262. vaesenc xmm5, xmm5, [rsi+128]
  11263. vaesenc xmm5, xmm5, [rsi+144]
  11264. cmp r9d, 11
  11265. vmovdqu xmm0, OWORD PTR [rsi+160]
  11266. jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last
  11267. vaesenc xmm5, xmm5, xmm0
  11268. vaesenc xmm5, xmm5, [rsi+176]
  11269. cmp r9d, 13
  11270. vmovdqu xmm0, OWORD PTR [rsi+192]
  11271. jl L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last
  11272. vaesenc xmm5, xmm5, xmm0
  11273. vaesenc xmm5, xmm5, [rsi+208]
  11274. vmovdqu xmm0, OWORD PTR [rsi+224]
  11275. L_AES_GCM_encrypt_avx2_calc_iv_1_aesenc_avx_last:
  11276. vaesenclast xmm5, xmm5, xmm0
  11277. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11278. ; Calc counter
  11279. ; Initialization vector
  11280. cmp edx, 0
  11281. mov rcx, 0
  11282. je L_AES_GCM_encrypt_avx2_calc_iv_done
  11283. cmp edx, 16
  11284. jl L_AES_GCM_encrypt_avx2_calc_iv_lt16
  11285. and edx, 4294967280
  11286. L_AES_GCM_encrypt_avx2_calc_iv_16_loop:
  11287. vmovdqu xmm0, OWORD PTR [rax+rcx]
  11288. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11289. vpxor xmm4, xmm4, xmm0
  11290. ; ghash_gfmul_avx
  11291. vpclmulqdq xmm2, xmm5, xmm4, 16
  11292. vpclmulqdq xmm1, xmm5, xmm4, 1
  11293. vpclmulqdq xmm0, xmm5, xmm4, 0
  11294. vpclmulqdq xmm3, xmm5, xmm4, 17
  11295. vpxor xmm2, xmm2, xmm1
  11296. vpslldq xmm1, xmm2, 8
  11297. vpsrldq xmm2, xmm2, 8
  11298. vpxor xmm7, xmm0, xmm1
  11299. vpxor xmm4, xmm3, xmm2
  11300. ; ghash_mid
  11301. vpsrld xmm0, xmm7, 31
  11302. vpsrld xmm1, xmm4, 31
  11303. vpslld xmm7, xmm7, 1
  11304. vpslld xmm4, xmm4, 1
  11305. vpsrldq xmm2, xmm0, 12
  11306. vpslldq xmm0, xmm0, 4
  11307. vpslldq xmm1, xmm1, 4
  11308. vpor xmm4, xmm4, xmm2
  11309. vpor xmm7, xmm7, xmm0
  11310. vpor xmm4, xmm4, xmm1
  11311. ; ghash_red
  11312. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  11313. vpclmulqdq xmm0, xmm7, xmm2, 16
  11314. vpshufd xmm1, xmm7, 78
  11315. vpxor xmm1, xmm1, xmm0
  11316. vpclmulqdq xmm0, xmm1, xmm2, 16
  11317. vpshufd xmm1, xmm1, 78
  11318. vpxor xmm1, xmm1, xmm0
  11319. vpxor xmm4, xmm4, xmm1
  11320. add ecx, 16
  11321. cmp ecx, edx
  11322. jl L_AES_GCM_encrypt_avx2_calc_iv_16_loop
  11323. mov edx, ebx
  11324. cmp ecx, edx
  11325. je L_AES_GCM_encrypt_avx2_calc_iv_done
  11326. L_AES_GCM_encrypt_avx2_calc_iv_lt16:
  11327. vpxor xmm0, xmm0, xmm0
  11328. xor ebx, ebx
  11329. vmovdqu OWORD PTR [rsp], xmm0
  11330. L_AES_GCM_encrypt_avx2_calc_iv_loop:
  11331. movzx r13d, BYTE PTR [rax+rcx]
  11332. mov BYTE PTR [rsp+rbx], r13b
  11333. inc ecx
  11334. inc ebx
  11335. cmp ecx, edx
  11336. jl L_AES_GCM_encrypt_avx2_calc_iv_loop
  11337. vmovdqu xmm0, OWORD PTR [rsp]
  11338. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11339. vpxor xmm4, xmm4, xmm0
  11340. ; ghash_gfmul_avx
  11341. vpclmulqdq xmm2, xmm5, xmm4, 16
  11342. vpclmulqdq xmm1, xmm5, xmm4, 1
  11343. vpclmulqdq xmm0, xmm5, xmm4, 0
  11344. vpclmulqdq xmm3, xmm5, xmm4, 17
  11345. vpxor xmm2, xmm2, xmm1
  11346. vpslldq xmm1, xmm2, 8
  11347. vpsrldq xmm2, xmm2, 8
  11348. vpxor xmm7, xmm0, xmm1
  11349. vpxor xmm4, xmm3, xmm2
  11350. ; ghash_mid
  11351. vpsrld xmm0, xmm7, 31
  11352. vpsrld xmm1, xmm4, 31
  11353. vpslld xmm7, xmm7, 1
  11354. vpslld xmm4, xmm4, 1
  11355. vpsrldq xmm2, xmm0, 12
  11356. vpslldq xmm0, xmm0, 4
  11357. vpslldq xmm1, xmm1, 4
  11358. vpor xmm4, xmm4, xmm2
  11359. vpor xmm7, xmm7, xmm0
  11360. vpor xmm4, xmm4, xmm1
  11361. ; ghash_red
  11362. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  11363. vpclmulqdq xmm0, xmm7, xmm2, 16
  11364. vpshufd xmm1, xmm7, 78
  11365. vpxor xmm1, xmm1, xmm0
  11366. vpclmulqdq xmm0, xmm1, xmm2, 16
  11367. vpshufd xmm1, xmm1, 78
  11368. vpxor xmm1, xmm1, xmm0
  11369. vpxor xmm4, xmm4, xmm1
  11370. L_AES_GCM_encrypt_avx2_calc_iv_done:
  11371. ; T = Encrypt counter
  11372. vpxor xmm0, xmm0, xmm0
  11373. shl edx, 3
  11374. vmovq xmm0, rdx
  11375. vpxor xmm4, xmm4, xmm0
  11376. ; ghash_gfmul_avx
  11377. vpclmulqdq xmm2, xmm5, xmm4, 16
  11378. vpclmulqdq xmm1, xmm5, xmm4, 1
  11379. vpclmulqdq xmm0, xmm5, xmm4, 0
  11380. vpclmulqdq xmm3, xmm5, xmm4, 17
  11381. vpxor xmm2, xmm2, xmm1
  11382. vpslldq xmm1, xmm2, 8
  11383. vpsrldq xmm2, xmm2, 8
  11384. vpxor xmm7, xmm0, xmm1
  11385. vpxor xmm4, xmm3, xmm2
  11386. ; ghash_mid
  11387. vpsrld xmm0, xmm7, 31
  11388. vpsrld xmm1, xmm4, 31
  11389. vpslld xmm7, xmm7, 1
  11390. vpslld xmm4, xmm4, 1
  11391. vpsrldq xmm2, xmm0, 12
  11392. vpslldq xmm0, xmm0, 4
  11393. vpslldq xmm1, xmm1, 4
  11394. vpor xmm4, xmm4, xmm2
  11395. vpor xmm7, xmm7, xmm0
  11396. vpor xmm4, xmm4, xmm1
  11397. ; ghash_red
  11398. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  11399. vpclmulqdq xmm0, xmm7, xmm2, 16
  11400. vpshufd xmm1, xmm7, 78
  11401. vpxor xmm1, xmm1, xmm0
  11402. vpclmulqdq xmm0, xmm1, xmm2, 16
  11403. vpshufd xmm1, xmm1, 78
  11404. vpxor xmm1, xmm1, xmm0
  11405. vpxor xmm4, xmm4, xmm1
  11406. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11407. ; Encrypt counter
  11408. vmovdqu xmm15, OWORD PTR [rsi]
  11409. vpxor xmm15, xmm15, xmm4
  11410. vaesenc xmm15, xmm15, [rsi+16]
  11411. vaesenc xmm15, xmm15, [rsi+32]
  11412. vaesenc xmm15, xmm15, [rsi+48]
  11413. vaesenc xmm15, xmm15, [rsi+64]
  11414. vaesenc xmm15, xmm15, [rsi+80]
  11415. vaesenc xmm15, xmm15, [rsi+96]
  11416. vaesenc xmm15, xmm15, [rsi+112]
  11417. vaesenc xmm15, xmm15, [rsi+128]
  11418. vaesenc xmm15, xmm15, [rsi+144]
  11419. cmp r9d, 11
  11420. vmovdqu xmm0, OWORD PTR [rsi+160]
  11421. jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last
  11422. vaesenc xmm15, xmm15, xmm0
  11423. vaesenc xmm15, xmm15, [rsi+176]
  11424. cmp r9d, 13
  11425. vmovdqu xmm0, OWORD PTR [rsi+192]
  11426. jl L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last
  11427. vaesenc xmm15, xmm15, xmm0
  11428. vaesenc xmm15, xmm15, [rsi+208]
  11429. vmovdqu xmm0, OWORD PTR [rsi+224]
  11430. L_AES_GCM_encrypt_avx2_calc_iv_2_aesenc_avx_last:
  11431. vaesenclast xmm15, xmm15, xmm0
  11432. jmp L_AES_GCM_encrypt_avx2_iv_done
  11433. L_AES_GCM_encrypt_avx2_iv_12:
  11434. ; # Calculate values when IV is 12 bytes
  11435. ; Set counter based on IV
  11436. vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_one
  11437. vmovdqu xmm5, OWORD PTR [rsi]
  11438. vpblendd xmm4, xmm4, [rax], 7
  11439. ; H = Encrypt X(=0) and T = Encrypt counter
  11440. vmovdqu xmm7, OWORD PTR [rsi+16]
  11441. vpxor xmm15, xmm4, xmm5
  11442. vaesenc xmm5, xmm5, xmm7
  11443. vaesenc xmm15, xmm15, xmm7
  11444. vmovdqu xmm0, OWORD PTR [rsi+32]
  11445. vaesenc xmm5, xmm5, xmm0
  11446. vaesenc xmm15, xmm15, xmm0
  11447. vmovdqu xmm0, OWORD PTR [rsi+48]
  11448. vaesenc xmm5, xmm5, xmm0
  11449. vaesenc xmm15, xmm15, xmm0
  11450. vmovdqu xmm0, OWORD PTR [rsi+64]
  11451. vaesenc xmm5, xmm5, xmm0
  11452. vaesenc xmm15, xmm15, xmm0
  11453. vmovdqu xmm0, OWORD PTR [rsi+80]
  11454. vaesenc xmm5, xmm5, xmm0
  11455. vaesenc xmm15, xmm15, xmm0
  11456. vmovdqu xmm0, OWORD PTR [rsi+96]
  11457. vaesenc xmm5, xmm5, xmm0
  11458. vaesenc xmm15, xmm15, xmm0
  11459. vmovdqu xmm0, OWORD PTR [rsi+112]
  11460. vaesenc xmm5, xmm5, xmm0
  11461. vaesenc xmm15, xmm15, xmm0
  11462. vmovdqu xmm0, OWORD PTR [rsi+128]
  11463. vaesenc xmm5, xmm5, xmm0
  11464. vaesenc xmm15, xmm15, xmm0
  11465. vmovdqu xmm0, OWORD PTR [rsi+144]
  11466. vaesenc xmm5, xmm5, xmm0
  11467. vaesenc xmm15, xmm15, xmm0
  11468. cmp r9d, 11
  11469. vmovdqu xmm0, OWORD PTR [rsi+160]
  11470. jl L_AES_GCM_encrypt_avx2_calc_iv_12_last
  11471. vaesenc xmm5, xmm5, xmm0
  11472. vaesenc xmm15, xmm15, xmm0
  11473. vmovdqu xmm0, OWORD PTR [rsi+176]
  11474. vaesenc xmm5, xmm5, xmm0
  11475. vaesenc xmm15, xmm15, xmm0
  11476. cmp r9d, 13
  11477. vmovdqu xmm0, OWORD PTR [rsi+192]
  11478. jl L_AES_GCM_encrypt_avx2_calc_iv_12_last
  11479. vaesenc xmm5, xmm5, xmm0
  11480. vaesenc xmm15, xmm15, xmm0
  11481. vmovdqu xmm0, OWORD PTR [rsi+208]
  11482. vaesenc xmm5, xmm5, xmm0
  11483. vaesenc xmm15, xmm15, xmm0
  11484. vmovdqu xmm0, OWORD PTR [rsi+224]
  11485. L_AES_GCM_encrypt_avx2_calc_iv_12_last:
  11486. vaesenclast xmm5, xmm5, xmm0
  11487. vaesenclast xmm15, xmm15, xmm0
  11488. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11489. L_AES_GCM_encrypt_avx2_iv_done:
  11490. ; Additional authentication data
  11491. mov edx, r11d
  11492. cmp edx, 0
  11493. je L_AES_GCM_encrypt_avx2_calc_aad_done
  11494. xor ecx, ecx
  11495. cmp edx, 16
  11496. jl L_AES_GCM_encrypt_avx2_calc_aad_lt16
  11497. and edx, 4294967280
  11498. L_AES_GCM_encrypt_avx2_calc_aad_16_loop:
  11499. vmovdqu xmm0, OWORD PTR [r12+rcx]
  11500. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11501. vpxor xmm6, xmm6, xmm0
  11502. ; ghash_gfmul_avx
  11503. vpclmulqdq xmm2, xmm5, xmm6, 16
  11504. vpclmulqdq xmm1, xmm5, xmm6, 1
  11505. vpclmulqdq xmm0, xmm5, xmm6, 0
  11506. vpclmulqdq xmm3, xmm5, xmm6, 17
  11507. vpxor xmm2, xmm2, xmm1
  11508. vpslldq xmm1, xmm2, 8
  11509. vpsrldq xmm2, xmm2, 8
  11510. vpxor xmm7, xmm0, xmm1
  11511. vpxor xmm6, xmm3, xmm2
  11512. ; ghash_mid
  11513. vpsrld xmm0, xmm7, 31
  11514. vpsrld xmm1, xmm6, 31
  11515. vpslld xmm7, xmm7, 1
  11516. vpslld xmm6, xmm6, 1
  11517. vpsrldq xmm2, xmm0, 12
  11518. vpslldq xmm0, xmm0, 4
  11519. vpslldq xmm1, xmm1, 4
  11520. vpor xmm6, xmm6, xmm2
  11521. vpor xmm7, xmm7, xmm0
  11522. vpor xmm6, xmm6, xmm1
  11523. ; ghash_red
  11524. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  11525. vpclmulqdq xmm0, xmm7, xmm2, 16
  11526. vpshufd xmm1, xmm7, 78
  11527. vpxor xmm1, xmm1, xmm0
  11528. vpclmulqdq xmm0, xmm1, xmm2, 16
  11529. vpshufd xmm1, xmm1, 78
  11530. vpxor xmm1, xmm1, xmm0
  11531. vpxor xmm6, xmm6, xmm1
  11532. add ecx, 16
  11533. cmp ecx, edx
  11534. jl L_AES_GCM_encrypt_avx2_calc_aad_16_loop
  11535. mov edx, r11d
  11536. cmp ecx, edx
  11537. je L_AES_GCM_encrypt_avx2_calc_aad_done
  11538. L_AES_GCM_encrypt_avx2_calc_aad_lt16:
  11539. vpxor xmm0, xmm0, xmm0
  11540. xor ebx, ebx
  11541. vmovdqu OWORD PTR [rsp], xmm0
  11542. L_AES_GCM_encrypt_avx2_calc_aad_loop:
  11543. movzx r13d, BYTE PTR [r12+rcx]
  11544. mov BYTE PTR [rsp+rbx], r13b
  11545. inc ecx
  11546. inc ebx
  11547. cmp ecx, edx
  11548. jl L_AES_GCM_encrypt_avx2_calc_aad_loop
  11549. vmovdqu xmm0, OWORD PTR [rsp]
  11550. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11551. vpxor xmm6, xmm6, xmm0
  11552. ; ghash_gfmul_avx
  11553. vpclmulqdq xmm2, xmm5, xmm6, 16
  11554. vpclmulqdq xmm1, xmm5, xmm6, 1
  11555. vpclmulqdq xmm0, xmm5, xmm6, 0
  11556. vpclmulqdq xmm3, xmm5, xmm6, 17
  11557. vpxor xmm2, xmm2, xmm1
  11558. vpslldq xmm1, xmm2, 8
  11559. vpsrldq xmm2, xmm2, 8
  11560. vpxor xmm7, xmm0, xmm1
  11561. vpxor xmm6, xmm3, xmm2
  11562. ; ghash_mid
  11563. vpsrld xmm0, xmm7, 31
  11564. vpsrld xmm1, xmm6, 31
  11565. vpslld xmm7, xmm7, 1
  11566. vpslld xmm6, xmm6, 1
  11567. vpsrldq xmm2, xmm0, 12
  11568. vpslldq xmm0, xmm0, 4
  11569. vpslldq xmm1, xmm1, 4
  11570. vpor xmm6, xmm6, xmm2
  11571. vpor xmm7, xmm7, xmm0
  11572. vpor xmm6, xmm6, xmm1
  11573. ; ghash_red
  11574. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  11575. vpclmulqdq xmm0, xmm7, xmm2, 16
  11576. vpshufd xmm1, xmm7, 78
  11577. vpxor xmm1, xmm1, xmm0
  11578. vpclmulqdq xmm0, xmm1, xmm2, 16
  11579. vpshufd xmm1, xmm1, 78
  11580. vpxor xmm1, xmm1, xmm0
  11581. vpxor xmm6, xmm6, xmm1
  11582. L_AES_GCM_encrypt_avx2_calc_aad_done:
  11583. ; Calculate counter and H
  11584. vpsrlq xmm1, xmm5, 63
  11585. vpsllq xmm0, xmm5, 1
  11586. vpslldq xmm1, xmm1, 8
  11587. vpor xmm0, xmm0, xmm1
  11588. vpshufd xmm5, xmm5, 255
  11589. vpsrad xmm5, xmm5, 31
  11590. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  11591. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  11592. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  11593. vpxor xmm5, xmm5, xmm0
  11594. xor ebx, ebx
  11595. cmp r10d, 128
  11596. mov r13d, r10d
  11597. jl L_AES_GCM_encrypt_avx2_done_128
  11598. and r13d, 4294967168
  11599. vmovdqu OWORD PTR [rsp+128], xmm4
  11600. vmovdqu OWORD PTR [rsp+144], xmm15
  11601. vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
  11602. ; H ^ 1 and H ^ 2
  11603. vpclmulqdq xmm9, xmm5, xmm5, 0
  11604. vpclmulqdq xmm10, xmm5, xmm5, 17
  11605. vpclmulqdq xmm8, xmm9, xmm3, 16
  11606. vpshufd xmm9, xmm9, 78
  11607. vpxor xmm9, xmm9, xmm8
  11608. vpclmulqdq xmm8, xmm9, xmm3, 16
  11609. vpshufd xmm9, xmm9, 78
  11610. vpxor xmm9, xmm9, xmm8
  11611. vpxor xmm0, xmm10, xmm9
  11612. vmovdqu OWORD PTR [rsp], xmm5
  11613. vmovdqu OWORD PTR [rsp+16], xmm0
  11614. ; H ^ 3 and H ^ 4
  11615. vpclmulqdq xmm11, xmm0, xmm5, 16
  11616. vpclmulqdq xmm10, xmm0, xmm5, 1
  11617. vpclmulqdq xmm9, xmm0, xmm5, 0
  11618. vpclmulqdq xmm12, xmm0, xmm5, 17
  11619. vpclmulqdq xmm13, xmm0, xmm0, 0
  11620. vpclmulqdq xmm14, xmm0, xmm0, 17
  11621. vpxor xmm11, xmm11, xmm10
  11622. vpslldq xmm10, xmm11, 8
  11623. vpsrldq xmm11, xmm11, 8
  11624. vpxor xmm10, xmm10, xmm9
  11625. vpclmulqdq xmm8, xmm13, xmm3, 16
  11626. vpclmulqdq xmm9, xmm10, xmm3, 16
  11627. vpshufd xmm10, xmm10, 78
  11628. vpshufd xmm13, xmm13, 78
  11629. vpxor xmm10, xmm10, xmm9
  11630. vpxor xmm13, xmm13, xmm8
  11631. vpclmulqdq xmm9, xmm10, xmm3, 16
  11632. vpclmulqdq xmm8, xmm13, xmm3, 16
  11633. vpshufd xmm10, xmm10, 78
  11634. vpshufd xmm13, xmm13, 78
  11635. vpxor xmm12, xmm12, xmm11
  11636. vpxor xmm13, xmm13, xmm8
  11637. vpxor xmm10, xmm10, xmm12
  11638. vpxor xmm2, xmm13, xmm14
  11639. vpxor xmm1, xmm10, xmm9
  11640. vmovdqu OWORD PTR [rsp+32], xmm1
  11641. vmovdqu OWORD PTR [rsp+48], xmm2
  11642. ; H ^ 5 and H ^ 6
  11643. vpclmulqdq xmm11, xmm1, xmm0, 16
  11644. vpclmulqdq xmm10, xmm1, xmm0, 1
  11645. vpclmulqdq xmm9, xmm1, xmm0, 0
  11646. vpclmulqdq xmm12, xmm1, xmm0, 17
  11647. vpclmulqdq xmm13, xmm1, xmm1, 0
  11648. vpclmulqdq xmm14, xmm1, xmm1, 17
  11649. vpxor xmm11, xmm11, xmm10
  11650. vpslldq xmm10, xmm11, 8
  11651. vpsrldq xmm11, xmm11, 8
  11652. vpxor xmm10, xmm10, xmm9
  11653. vpclmulqdq xmm8, xmm13, xmm3, 16
  11654. vpclmulqdq xmm9, xmm10, xmm3, 16
  11655. vpshufd xmm10, xmm10, 78
  11656. vpshufd xmm13, xmm13, 78
  11657. vpxor xmm10, xmm10, xmm9
  11658. vpxor xmm13, xmm13, xmm8
  11659. vpclmulqdq xmm9, xmm10, xmm3, 16
  11660. vpclmulqdq xmm8, xmm13, xmm3, 16
  11661. vpshufd xmm10, xmm10, 78
  11662. vpshufd xmm13, xmm13, 78
  11663. vpxor xmm12, xmm12, xmm11
  11664. vpxor xmm13, xmm13, xmm8
  11665. vpxor xmm10, xmm10, xmm12
  11666. vpxor xmm0, xmm13, xmm14
  11667. vpxor xmm7, xmm10, xmm9
  11668. vmovdqu OWORD PTR [rsp+64], xmm7
  11669. vmovdqu OWORD PTR [rsp+80], xmm0
  11670. ; H ^ 7 and H ^ 8
  11671. vpclmulqdq xmm11, xmm2, xmm1, 16
  11672. vpclmulqdq xmm10, xmm2, xmm1, 1
  11673. vpclmulqdq xmm9, xmm2, xmm1, 0
  11674. vpclmulqdq xmm12, xmm2, xmm1, 17
  11675. vpclmulqdq xmm13, xmm2, xmm2, 0
  11676. vpclmulqdq xmm14, xmm2, xmm2, 17
  11677. vpxor xmm11, xmm11, xmm10
  11678. vpslldq xmm10, xmm11, 8
  11679. vpsrldq xmm11, xmm11, 8
  11680. vpxor xmm10, xmm10, xmm9
  11681. vpclmulqdq xmm8, xmm13, xmm3, 16
  11682. vpclmulqdq xmm9, xmm10, xmm3, 16
  11683. vpshufd xmm10, xmm10, 78
  11684. vpshufd xmm13, xmm13, 78
  11685. vpxor xmm10, xmm10, xmm9
  11686. vpxor xmm13, xmm13, xmm8
  11687. vpclmulqdq xmm9, xmm10, xmm3, 16
  11688. vpclmulqdq xmm8, xmm13, xmm3, 16
  11689. vpshufd xmm10, xmm10, 78
  11690. vpshufd xmm13, xmm13, 78
  11691. vpxor xmm12, xmm12, xmm11
  11692. vpxor xmm13, xmm13, xmm8
  11693. vpxor xmm10, xmm10, xmm12
  11694. vpxor xmm0, xmm13, xmm14
  11695. vpxor xmm7, xmm10, xmm9
  11696. vmovdqu OWORD PTR [rsp+96], xmm7
  11697. vmovdqu OWORD PTR [rsp+112], xmm0
  11698. ; First 128 bytes of input
  11699. ; aesenc_128
  11700. ; aesenc_ctr
  11701. vmovdqu xmm0, OWORD PTR [rsp+128]
  11702. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  11703. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  11704. vpshufb xmm8, xmm0, xmm1
  11705. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  11706. vpshufb xmm9, xmm9, xmm1
  11707. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  11708. vpshufb xmm10, xmm10, xmm1
  11709. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  11710. vpshufb xmm11, xmm11, xmm1
  11711. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  11712. vpshufb xmm12, xmm12, xmm1
  11713. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  11714. vpshufb xmm13, xmm13, xmm1
  11715. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  11716. vpshufb xmm14, xmm14, xmm1
  11717. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  11718. vpshufb xmm15, xmm15, xmm1
  11719. ; aesenc_xor
  11720. vmovdqu xmm7, OWORD PTR [rsi]
  11721. vmovdqu OWORD PTR [rsp+128], xmm0
  11722. vpxor xmm8, xmm8, xmm7
  11723. vpxor xmm9, xmm9, xmm7
  11724. vpxor xmm10, xmm10, xmm7
  11725. vpxor xmm11, xmm11, xmm7
  11726. vpxor xmm12, xmm12, xmm7
  11727. vpxor xmm13, xmm13, xmm7
  11728. vpxor xmm14, xmm14, xmm7
  11729. vpxor xmm15, xmm15, xmm7
  11730. vmovdqu xmm7, OWORD PTR [rsi+16]
  11731. vaesenc xmm8, xmm8, xmm7
  11732. vaesenc xmm9, xmm9, xmm7
  11733. vaesenc xmm10, xmm10, xmm7
  11734. vaesenc xmm11, xmm11, xmm7
  11735. vaesenc xmm12, xmm12, xmm7
  11736. vaesenc xmm13, xmm13, xmm7
  11737. vaesenc xmm14, xmm14, xmm7
  11738. vaesenc xmm15, xmm15, xmm7
  11739. vmovdqu xmm7, OWORD PTR [rsi+32]
  11740. vaesenc xmm8, xmm8, xmm7
  11741. vaesenc xmm9, xmm9, xmm7
  11742. vaesenc xmm10, xmm10, xmm7
  11743. vaesenc xmm11, xmm11, xmm7
  11744. vaesenc xmm12, xmm12, xmm7
  11745. vaesenc xmm13, xmm13, xmm7
  11746. vaesenc xmm14, xmm14, xmm7
  11747. vaesenc xmm15, xmm15, xmm7
  11748. vmovdqu xmm7, OWORD PTR [rsi+48]
  11749. vaesenc xmm8, xmm8, xmm7
  11750. vaesenc xmm9, xmm9, xmm7
  11751. vaesenc xmm10, xmm10, xmm7
  11752. vaesenc xmm11, xmm11, xmm7
  11753. vaesenc xmm12, xmm12, xmm7
  11754. vaesenc xmm13, xmm13, xmm7
  11755. vaesenc xmm14, xmm14, xmm7
  11756. vaesenc xmm15, xmm15, xmm7
  11757. vmovdqu xmm7, OWORD PTR [rsi+64]
  11758. vaesenc xmm8, xmm8, xmm7
  11759. vaesenc xmm9, xmm9, xmm7
  11760. vaesenc xmm10, xmm10, xmm7
  11761. vaesenc xmm11, xmm11, xmm7
  11762. vaesenc xmm12, xmm12, xmm7
  11763. vaesenc xmm13, xmm13, xmm7
  11764. vaesenc xmm14, xmm14, xmm7
  11765. vaesenc xmm15, xmm15, xmm7
  11766. vmovdqu xmm7, OWORD PTR [rsi+80]
  11767. vaesenc xmm8, xmm8, xmm7
  11768. vaesenc xmm9, xmm9, xmm7
  11769. vaesenc xmm10, xmm10, xmm7
  11770. vaesenc xmm11, xmm11, xmm7
  11771. vaesenc xmm12, xmm12, xmm7
  11772. vaesenc xmm13, xmm13, xmm7
  11773. vaesenc xmm14, xmm14, xmm7
  11774. vaesenc xmm15, xmm15, xmm7
  11775. vmovdqu xmm7, OWORD PTR [rsi+96]
  11776. vaesenc xmm8, xmm8, xmm7
  11777. vaesenc xmm9, xmm9, xmm7
  11778. vaesenc xmm10, xmm10, xmm7
  11779. vaesenc xmm11, xmm11, xmm7
  11780. vaesenc xmm12, xmm12, xmm7
  11781. vaesenc xmm13, xmm13, xmm7
  11782. vaesenc xmm14, xmm14, xmm7
  11783. vaesenc xmm15, xmm15, xmm7
  11784. vmovdqu xmm7, OWORD PTR [rsi+112]
  11785. vaesenc xmm8, xmm8, xmm7
  11786. vaesenc xmm9, xmm9, xmm7
  11787. vaesenc xmm10, xmm10, xmm7
  11788. vaesenc xmm11, xmm11, xmm7
  11789. vaesenc xmm12, xmm12, xmm7
  11790. vaesenc xmm13, xmm13, xmm7
  11791. vaesenc xmm14, xmm14, xmm7
  11792. vaesenc xmm15, xmm15, xmm7
  11793. vmovdqu xmm7, OWORD PTR [rsi+128]
  11794. vaesenc xmm8, xmm8, xmm7
  11795. vaesenc xmm9, xmm9, xmm7
  11796. vaesenc xmm10, xmm10, xmm7
  11797. vaesenc xmm11, xmm11, xmm7
  11798. vaesenc xmm12, xmm12, xmm7
  11799. vaesenc xmm13, xmm13, xmm7
  11800. vaesenc xmm14, xmm14, xmm7
  11801. vaesenc xmm15, xmm15, xmm7
  11802. vmovdqu xmm7, OWORD PTR [rsi+144]
  11803. vaesenc xmm8, xmm8, xmm7
  11804. vaesenc xmm9, xmm9, xmm7
  11805. vaesenc xmm10, xmm10, xmm7
  11806. vaesenc xmm11, xmm11, xmm7
  11807. vaesenc xmm12, xmm12, xmm7
  11808. vaesenc xmm13, xmm13, xmm7
  11809. vaesenc xmm14, xmm14, xmm7
  11810. vaesenc xmm15, xmm15, xmm7
  11811. cmp r9d, 11
  11812. vmovdqu xmm7, OWORD PTR [rsi+160]
  11813. jl L_AES_GCM_encrypt_avx2_aesenc_128_enc_done
  11814. vaesenc xmm8, xmm8, xmm7
  11815. vaesenc xmm9, xmm9, xmm7
  11816. vaesenc xmm10, xmm10, xmm7
  11817. vaesenc xmm11, xmm11, xmm7
  11818. vaesenc xmm12, xmm12, xmm7
  11819. vaesenc xmm13, xmm13, xmm7
  11820. vaesenc xmm14, xmm14, xmm7
  11821. vaesenc xmm15, xmm15, xmm7
  11822. vmovdqu xmm7, OWORD PTR [rsi+176]
  11823. vaesenc xmm8, xmm8, xmm7
  11824. vaesenc xmm9, xmm9, xmm7
  11825. vaesenc xmm10, xmm10, xmm7
  11826. vaesenc xmm11, xmm11, xmm7
  11827. vaesenc xmm12, xmm12, xmm7
  11828. vaesenc xmm13, xmm13, xmm7
  11829. vaesenc xmm14, xmm14, xmm7
  11830. vaesenc xmm15, xmm15, xmm7
  11831. cmp r9d, 13
  11832. vmovdqu xmm7, OWORD PTR [rsi+192]
  11833. jl L_AES_GCM_encrypt_avx2_aesenc_128_enc_done
  11834. vaesenc xmm8, xmm8, xmm7
  11835. vaesenc xmm9, xmm9, xmm7
  11836. vaesenc xmm10, xmm10, xmm7
  11837. vaesenc xmm11, xmm11, xmm7
  11838. vaesenc xmm12, xmm12, xmm7
  11839. vaesenc xmm13, xmm13, xmm7
  11840. vaesenc xmm14, xmm14, xmm7
  11841. vaesenc xmm15, xmm15, xmm7
  11842. vmovdqu xmm7, OWORD PTR [rsi+208]
  11843. vaesenc xmm8, xmm8, xmm7
  11844. vaesenc xmm9, xmm9, xmm7
  11845. vaesenc xmm10, xmm10, xmm7
  11846. vaesenc xmm11, xmm11, xmm7
  11847. vaesenc xmm12, xmm12, xmm7
  11848. vaesenc xmm13, xmm13, xmm7
  11849. vaesenc xmm14, xmm14, xmm7
  11850. vaesenc xmm15, xmm15, xmm7
  11851. vmovdqu xmm7, OWORD PTR [rsi+224]
  11852. L_AES_GCM_encrypt_avx2_aesenc_128_enc_done:
  11853. ; aesenc_last
  11854. vaesenclast xmm8, xmm8, xmm7
  11855. vaesenclast xmm9, xmm9, xmm7
  11856. vaesenclast xmm10, xmm10, xmm7
  11857. vaesenclast xmm11, xmm11, xmm7
  11858. vmovdqu xmm0, OWORD PTR [rdi]
  11859. vmovdqu xmm1, OWORD PTR [rdi+16]
  11860. vmovdqu xmm2, OWORD PTR [rdi+32]
  11861. vmovdqu xmm3, OWORD PTR [rdi+48]
  11862. vpxor xmm8, xmm8, xmm0
  11863. vpxor xmm9, xmm9, xmm1
  11864. vpxor xmm10, xmm10, xmm2
  11865. vpxor xmm11, xmm11, xmm3
  11866. vmovdqu OWORD PTR [r8], xmm8
  11867. vmovdqu OWORD PTR [r8+16], xmm9
  11868. vmovdqu OWORD PTR [r8+32], xmm10
  11869. vmovdqu OWORD PTR [r8+48], xmm11
  11870. vaesenclast xmm12, xmm12, xmm7
  11871. vaesenclast xmm13, xmm13, xmm7
  11872. vaesenclast xmm14, xmm14, xmm7
  11873. vaesenclast xmm15, xmm15, xmm7
  11874. vmovdqu xmm0, OWORD PTR [rdi+64]
  11875. vmovdqu xmm1, OWORD PTR [rdi+80]
  11876. vmovdqu xmm2, OWORD PTR [rdi+96]
  11877. vmovdqu xmm3, OWORD PTR [rdi+112]
  11878. vpxor xmm12, xmm12, xmm0
  11879. vpxor xmm13, xmm13, xmm1
  11880. vpxor xmm14, xmm14, xmm2
  11881. vpxor xmm15, xmm15, xmm3
  11882. vmovdqu OWORD PTR [r8+64], xmm12
  11883. vmovdqu OWORD PTR [r8+80], xmm13
  11884. vmovdqu OWORD PTR [r8+96], xmm14
  11885. vmovdqu OWORD PTR [r8+112], xmm15
  11886. cmp r13d, 128
  11887. mov ebx, 128
  11888. jle L_AES_GCM_encrypt_avx2_end_128
  11889. ; More 128 bytes of input
  11890. L_AES_GCM_encrypt_avx2_ghash_128:
  11891. ; aesenc_128_ghash
  11892. lea rcx, QWORD PTR [rdi+rbx]
  11893. lea rdx, QWORD PTR [r8+rbx]
  11894. ; aesenc_ctr
  11895. vmovdqu xmm0, OWORD PTR [rsp+128]
  11896. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  11897. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  11898. vpshufb xmm8, xmm0, xmm1
  11899. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  11900. vpshufb xmm9, xmm9, xmm1
  11901. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  11902. vpshufb xmm10, xmm10, xmm1
  11903. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  11904. vpshufb xmm11, xmm11, xmm1
  11905. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  11906. vpshufb xmm12, xmm12, xmm1
  11907. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  11908. vpshufb xmm13, xmm13, xmm1
  11909. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  11910. vpshufb xmm14, xmm14, xmm1
  11911. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  11912. vpshufb xmm15, xmm15, xmm1
  11913. ; aesenc_xor
  11914. vmovdqu xmm7, OWORD PTR [rsi]
  11915. vmovdqu OWORD PTR [rsp+128], xmm0
  11916. vpxor xmm8, xmm8, xmm7
  11917. vpxor xmm9, xmm9, xmm7
  11918. vpxor xmm10, xmm10, xmm7
  11919. vpxor xmm11, xmm11, xmm7
  11920. vpxor xmm12, xmm12, xmm7
  11921. vpxor xmm13, xmm13, xmm7
  11922. vpxor xmm14, xmm14, xmm7
  11923. vpxor xmm15, xmm15, xmm7
  11924. ; aesenc_pclmul_1
  11925. vmovdqu xmm1, OWORD PTR [rdx+-128]
  11926. vmovdqu xmm0, OWORD PTR [rsi+16]
  11927. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11928. vmovdqu xmm2, OWORD PTR [rsp+112]
  11929. vpxor xmm1, xmm1, xmm6
  11930. vpclmulqdq xmm5, xmm1, xmm2, 16
  11931. vpclmulqdq xmm3, xmm1, xmm2, 1
  11932. vpclmulqdq xmm6, xmm1, xmm2, 0
  11933. vpclmulqdq xmm7, xmm1, xmm2, 17
  11934. vaesenc xmm8, xmm8, xmm0
  11935. vaesenc xmm9, xmm9, xmm0
  11936. vaesenc xmm10, xmm10, xmm0
  11937. vaesenc xmm11, xmm11, xmm0
  11938. vaesenc xmm12, xmm12, xmm0
  11939. vaesenc xmm13, xmm13, xmm0
  11940. vaesenc xmm14, xmm14, xmm0
  11941. vaesenc xmm15, xmm15, xmm0
  11942. ; aesenc_pclmul_2
  11943. vmovdqu xmm1, OWORD PTR [rdx+-112]
  11944. vmovdqu xmm0, OWORD PTR [rsp+96]
  11945. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11946. vpxor xmm5, xmm5, xmm3
  11947. vpclmulqdq xmm2, xmm1, xmm0, 16
  11948. vpclmulqdq xmm3, xmm1, xmm0, 1
  11949. vpclmulqdq xmm4, xmm1, xmm0, 0
  11950. vpclmulqdq xmm1, xmm1, xmm0, 17
  11951. vmovdqu xmm0, OWORD PTR [rsi+32]
  11952. vpxor xmm7, xmm7, xmm1
  11953. vaesenc xmm8, xmm8, xmm0
  11954. vaesenc xmm9, xmm9, xmm0
  11955. vaesenc xmm10, xmm10, xmm0
  11956. vaesenc xmm11, xmm11, xmm0
  11957. vaesenc xmm12, xmm12, xmm0
  11958. vaesenc xmm13, xmm13, xmm0
  11959. vaesenc xmm14, xmm14, xmm0
  11960. vaesenc xmm15, xmm15, xmm0
  11961. ; aesenc_pclmul_n
  11962. vmovdqu xmm1, OWORD PTR [rdx+-96]
  11963. vmovdqu xmm0, OWORD PTR [rsp+80]
  11964. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11965. vpxor xmm5, xmm5, xmm2
  11966. vpclmulqdq xmm2, xmm1, xmm0, 16
  11967. vpxor xmm5, xmm5, xmm3
  11968. vpclmulqdq xmm3, xmm1, xmm0, 1
  11969. vpxor xmm6, xmm6, xmm4
  11970. vpclmulqdq xmm4, xmm1, xmm0, 0
  11971. vpclmulqdq xmm1, xmm1, xmm0, 17
  11972. vmovdqu xmm0, OWORD PTR [rsi+48]
  11973. vpxor xmm7, xmm7, xmm1
  11974. vaesenc xmm8, xmm8, xmm0
  11975. vaesenc xmm9, xmm9, xmm0
  11976. vaesenc xmm10, xmm10, xmm0
  11977. vaesenc xmm11, xmm11, xmm0
  11978. vaesenc xmm12, xmm12, xmm0
  11979. vaesenc xmm13, xmm13, xmm0
  11980. vaesenc xmm14, xmm14, xmm0
  11981. vaesenc xmm15, xmm15, xmm0
  11982. ; aesenc_pclmul_n
  11983. vmovdqu xmm1, OWORD PTR [rdx+-80]
  11984. vmovdqu xmm0, OWORD PTR [rsp+64]
  11985. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  11986. vpxor xmm5, xmm5, xmm2
  11987. vpclmulqdq xmm2, xmm1, xmm0, 16
  11988. vpxor xmm5, xmm5, xmm3
  11989. vpclmulqdq xmm3, xmm1, xmm0, 1
  11990. vpxor xmm6, xmm6, xmm4
  11991. vpclmulqdq xmm4, xmm1, xmm0, 0
  11992. vpclmulqdq xmm1, xmm1, xmm0, 17
  11993. vmovdqu xmm0, OWORD PTR [rsi+64]
  11994. vpxor xmm7, xmm7, xmm1
  11995. vaesenc xmm8, xmm8, xmm0
  11996. vaesenc xmm9, xmm9, xmm0
  11997. vaesenc xmm10, xmm10, xmm0
  11998. vaesenc xmm11, xmm11, xmm0
  11999. vaesenc xmm12, xmm12, xmm0
  12000. vaesenc xmm13, xmm13, xmm0
  12001. vaesenc xmm14, xmm14, xmm0
  12002. vaesenc xmm15, xmm15, xmm0
  12003. ; aesenc_pclmul_n
  12004. vmovdqu xmm1, OWORD PTR [rdx+-64]
  12005. vmovdqu xmm0, OWORD PTR [rsp+48]
  12006. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12007. vpxor xmm5, xmm5, xmm2
  12008. vpclmulqdq xmm2, xmm1, xmm0, 16
  12009. vpxor xmm5, xmm5, xmm3
  12010. vpclmulqdq xmm3, xmm1, xmm0, 1
  12011. vpxor xmm6, xmm6, xmm4
  12012. vpclmulqdq xmm4, xmm1, xmm0, 0
  12013. vpclmulqdq xmm1, xmm1, xmm0, 17
  12014. vmovdqu xmm0, OWORD PTR [rsi+80]
  12015. vpxor xmm7, xmm7, xmm1
  12016. vaesenc xmm8, xmm8, xmm0
  12017. vaesenc xmm9, xmm9, xmm0
  12018. vaesenc xmm10, xmm10, xmm0
  12019. vaesenc xmm11, xmm11, xmm0
  12020. vaesenc xmm12, xmm12, xmm0
  12021. vaesenc xmm13, xmm13, xmm0
  12022. vaesenc xmm14, xmm14, xmm0
  12023. vaesenc xmm15, xmm15, xmm0
  12024. ; aesenc_pclmul_n
  12025. vmovdqu xmm1, OWORD PTR [rdx+-48]
  12026. vmovdqu xmm0, OWORD PTR [rsp+32]
  12027. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12028. vpxor xmm5, xmm5, xmm2
  12029. vpclmulqdq xmm2, xmm1, xmm0, 16
  12030. vpxor xmm5, xmm5, xmm3
  12031. vpclmulqdq xmm3, xmm1, xmm0, 1
  12032. vpxor xmm6, xmm6, xmm4
  12033. vpclmulqdq xmm4, xmm1, xmm0, 0
  12034. vpclmulqdq xmm1, xmm1, xmm0, 17
  12035. vmovdqu xmm0, OWORD PTR [rsi+96]
  12036. vpxor xmm7, xmm7, xmm1
  12037. vaesenc xmm8, xmm8, xmm0
  12038. vaesenc xmm9, xmm9, xmm0
  12039. vaesenc xmm10, xmm10, xmm0
  12040. vaesenc xmm11, xmm11, xmm0
  12041. vaesenc xmm12, xmm12, xmm0
  12042. vaesenc xmm13, xmm13, xmm0
  12043. vaesenc xmm14, xmm14, xmm0
  12044. vaesenc xmm15, xmm15, xmm0
  12045. ; aesenc_pclmul_n
  12046. vmovdqu xmm1, OWORD PTR [rdx+-32]
  12047. vmovdqu xmm0, OWORD PTR [rsp+16]
  12048. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12049. vpxor xmm5, xmm5, xmm2
  12050. vpclmulqdq xmm2, xmm1, xmm0, 16
  12051. vpxor xmm5, xmm5, xmm3
  12052. vpclmulqdq xmm3, xmm1, xmm0, 1
  12053. vpxor xmm6, xmm6, xmm4
  12054. vpclmulqdq xmm4, xmm1, xmm0, 0
  12055. vpclmulqdq xmm1, xmm1, xmm0, 17
  12056. vmovdqu xmm0, OWORD PTR [rsi+112]
  12057. vpxor xmm7, xmm7, xmm1
  12058. vaesenc xmm8, xmm8, xmm0
  12059. vaesenc xmm9, xmm9, xmm0
  12060. vaesenc xmm10, xmm10, xmm0
  12061. vaesenc xmm11, xmm11, xmm0
  12062. vaesenc xmm12, xmm12, xmm0
  12063. vaesenc xmm13, xmm13, xmm0
  12064. vaesenc xmm14, xmm14, xmm0
  12065. vaesenc xmm15, xmm15, xmm0
  12066. ; aesenc_pclmul_n
  12067. vmovdqu xmm1, OWORD PTR [rdx+-16]
  12068. vmovdqu xmm0, OWORD PTR [rsp]
  12069. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12070. vpxor xmm5, xmm5, xmm2
  12071. vpclmulqdq xmm2, xmm1, xmm0, 16
  12072. vpxor xmm5, xmm5, xmm3
  12073. vpclmulqdq xmm3, xmm1, xmm0, 1
  12074. vpxor xmm6, xmm6, xmm4
  12075. vpclmulqdq xmm4, xmm1, xmm0, 0
  12076. vpclmulqdq xmm1, xmm1, xmm0, 17
  12077. vmovdqu xmm0, OWORD PTR [rsi+128]
  12078. vpxor xmm7, xmm7, xmm1
  12079. vaesenc xmm8, xmm8, xmm0
  12080. vaesenc xmm9, xmm9, xmm0
  12081. vaesenc xmm10, xmm10, xmm0
  12082. vaesenc xmm11, xmm11, xmm0
  12083. vaesenc xmm12, xmm12, xmm0
  12084. vaesenc xmm13, xmm13, xmm0
  12085. vaesenc xmm14, xmm14, xmm0
  12086. vaesenc xmm15, xmm15, xmm0
  12087. ; aesenc_pclmul_l
  12088. vpxor xmm5, xmm5, xmm2
  12089. vpxor xmm6, xmm6, xmm4
  12090. vpxor xmm5, xmm5, xmm3
  12091. vpslldq xmm1, xmm5, 8
  12092. vpsrldq xmm5, xmm5, 8
  12093. vmovdqu xmm4, OWORD PTR [rsi+144]
  12094. vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
  12095. vaesenc xmm8, xmm8, xmm4
  12096. vpxor xmm6, xmm6, xmm1
  12097. vpxor xmm7, xmm7, xmm5
  12098. vpclmulqdq xmm3, xmm6, xmm0, 16
  12099. vaesenc xmm9, xmm9, xmm4
  12100. vaesenc xmm10, xmm10, xmm4
  12101. vaesenc xmm11, xmm11, xmm4
  12102. vpshufd xmm6, xmm6, 78
  12103. vpxor xmm6, xmm6, xmm3
  12104. vpclmulqdq xmm3, xmm6, xmm0, 16
  12105. vaesenc xmm12, xmm12, xmm4
  12106. vaesenc xmm13, xmm13, xmm4
  12107. vaesenc xmm14, xmm14, xmm4
  12108. vpshufd xmm6, xmm6, 78
  12109. vpxor xmm6, xmm6, xmm3
  12110. vpxor xmm6, xmm6, xmm7
  12111. vaesenc xmm15, xmm15, xmm4
  12112. cmp r9d, 11
  12113. vmovdqu xmm7, OWORD PTR [rsi+160]
  12114. jl L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done
  12115. vaesenc xmm8, xmm8, xmm7
  12116. vaesenc xmm9, xmm9, xmm7
  12117. vaesenc xmm10, xmm10, xmm7
  12118. vaesenc xmm11, xmm11, xmm7
  12119. vaesenc xmm12, xmm12, xmm7
  12120. vaesenc xmm13, xmm13, xmm7
  12121. vaesenc xmm14, xmm14, xmm7
  12122. vaesenc xmm15, xmm15, xmm7
  12123. vmovdqu xmm7, OWORD PTR [rsi+176]
  12124. vaesenc xmm8, xmm8, xmm7
  12125. vaesenc xmm9, xmm9, xmm7
  12126. vaesenc xmm10, xmm10, xmm7
  12127. vaesenc xmm11, xmm11, xmm7
  12128. vaesenc xmm12, xmm12, xmm7
  12129. vaesenc xmm13, xmm13, xmm7
  12130. vaesenc xmm14, xmm14, xmm7
  12131. vaesenc xmm15, xmm15, xmm7
  12132. cmp r9d, 13
  12133. vmovdqu xmm7, OWORD PTR [rsi+192]
  12134. jl L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done
  12135. vaesenc xmm8, xmm8, xmm7
  12136. vaesenc xmm9, xmm9, xmm7
  12137. vaesenc xmm10, xmm10, xmm7
  12138. vaesenc xmm11, xmm11, xmm7
  12139. vaesenc xmm12, xmm12, xmm7
  12140. vaesenc xmm13, xmm13, xmm7
  12141. vaesenc xmm14, xmm14, xmm7
  12142. vaesenc xmm15, xmm15, xmm7
  12143. vmovdqu xmm7, OWORD PTR [rsi+208]
  12144. vaesenc xmm8, xmm8, xmm7
  12145. vaesenc xmm9, xmm9, xmm7
  12146. vaesenc xmm10, xmm10, xmm7
  12147. vaesenc xmm11, xmm11, xmm7
  12148. vaesenc xmm12, xmm12, xmm7
  12149. vaesenc xmm13, xmm13, xmm7
  12150. vaesenc xmm14, xmm14, xmm7
  12151. vaesenc xmm15, xmm15, xmm7
  12152. vmovdqu xmm7, OWORD PTR [rsi+224]
  12153. L_AES_GCM_encrypt_avx2_aesenc_128_ghash_avx_done:
  12154. ; aesenc_last
  12155. vaesenclast xmm8, xmm8, xmm7
  12156. vaesenclast xmm9, xmm9, xmm7
  12157. vaesenclast xmm10, xmm10, xmm7
  12158. vaesenclast xmm11, xmm11, xmm7
  12159. vmovdqu xmm0, OWORD PTR [rcx]
  12160. vmovdqu xmm1, OWORD PTR [rcx+16]
  12161. vmovdqu xmm2, OWORD PTR [rcx+32]
  12162. vmovdqu xmm3, OWORD PTR [rcx+48]
  12163. vpxor xmm8, xmm8, xmm0
  12164. vpxor xmm9, xmm9, xmm1
  12165. vpxor xmm10, xmm10, xmm2
  12166. vpxor xmm11, xmm11, xmm3
  12167. vmovdqu OWORD PTR [rdx], xmm8
  12168. vmovdqu OWORD PTR [rdx+16], xmm9
  12169. vmovdqu OWORD PTR [rdx+32], xmm10
  12170. vmovdqu OWORD PTR [rdx+48], xmm11
  12171. vaesenclast xmm12, xmm12, xmm7
  12172. vaesenclast xmm13, xmm13, xmm7
  12173. vaesenclast xmm14, xmm14, xmm7
  12174. vaesenclast xmm15, xmm15, xmm7
  12175. vmovdqu xmm0, OWORD PTR [rcx+64]
  12176. vmovdqu xmm1, OWORD PTR [rcx+80]
  12177. vmovdqu xmm2, OWORD PTR [rcx+96]
  12178. vmovdqu xmm3, OWORD PTR [rcx+112]
  12179. vpxor xmm12, xmm12, xmm0
  12180. vpxor xmm13, xmm13, xmm1
  12181. vpxor xmm14, xmm14, xmm2
  12182. vpxor xmm15, xmm15, xmm3
  12183. vmovdqu OWORD PTR [rdx+64], xmm12
  12184. vmovdqu OWORD PTR [rdx+80], xmm13
  12185. vmovdqu OWORD PTR [rdx+96], xmm14
  12186. vmovdqu OWORD PTR [rdx+112], xmm15
  12187. ; aesenc_128_ghash - end
  12188. add ebx, 128
  12189. cmp ebx, r13d
  12190. jl L_AES_GCM_encrypt_avx2_ghash_128
  12191. L_AES_GCM_encrypt_avx2_end_128:
  12192. vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12193. vpshufb xmm8, xmm8, xmm4
  12194. vpshufb xmm9, xmm9, xmm4
  12195. vpshufb xmm10, xmm10, xmm4
  12196. vpshufb xmm11, xmm11, xmm4
  12197. vpshufb xmm12, xmm12, xmm4
  12198. vpshufb xmm13, xmm13, xmm4
  12199. vpshufb xmm14, xmm14, xmm4
  12200. vpshufb xmm15, xmm15, xmm4
  12201. vpxor xmm8, xmm8, xmm6
  12202. vmovdqu xmm7, OWORD PTR [rsp]
  12203. vpclmulqdq xmm5, xmm7, xmm15, 16
  12204. vpclmulqdq xmm1, xmm7, xmm15, 1
  12205. vpclmulqdq xmm4, xmm7, xmm15, 0
  12206. vpclmulqdq xmm6, xmm7, xmm15, 17
  12207. vpxor xmm5, xmm5, xmm1
  12208. vmovdqu xmm7, OWORD PTR [rsp+16]
  12209. vpclmulqdq xmm2, xmm7, xmm14, 16
  12210. vpclmulqdq xmm1, xmm7, xmm14, 1
  12211. vpclmulqdq xmm0, xmm7, xmm14, 0
  12212. vpclmulqdq xmm3, xmm7, xmm14, 17
  12213. vpxor xmm2, xmm2, xmm1
  12214. vpxor xmm6, xmm6, xmm3
  12215. vpxor xmm5, xmm5, xmm2
  12216. vpxor xmm4, xmm4, xmm0
  12217. vmovdqu xmm15, OWORD PTR [rsp+32]
  12218. vmovdqu xmm7, OWORD PTR [rsp+48]
  12219. vpclmulqdq xmm2, xmm15, xmm13, 16
  12220. vpclmulqdq xmm1, xmm15, xmm13, 1
  12221. vpclmulqdq xmm0, xmm15, xmm13, 0
  12222. vpclmulqdq xmm3, xmm15, xmm13, 17
  12223. vpxor xmm2, xmm2, xmm1
  12224. vpxor xmm6, xmm6, xmm3
  12225. vpxor xmm5, xmm5, xmm2
  12226. vpxor xmm4, xmm4, xmm0
  12227. vpclmulqdq xmm2, xmm7, xmm12, 16
  12228. vpclmulqdq xmm1, xmm7, xmm12, 1
  12229. vpclmulqdq xmm0, xmm7, xmm12, 0
  12230. vpclmulqdq xmm3, xmm7, xmm12, 17
  12231. vpxor xmm2, xmm2, xmm1
  12232. vpxor xmm6, xmm6, xmm3
  12233. vpxor xmm5, xmm5, xmm2
  12234. vpxor xmm4, xmm4, xmm0
  12235. vmovdqu xmm15, OWORD PTR [rsp+64]
  12236. vmovdqu xmm7, OWORD PTR [rsp+80]
  12237. vpclmulqdq xmm2, xmm15, xmm11, 16
  12238. vpclmulqdq xmm1, xmm15, xmm11, 1
  12239. vpclmulqdq xmm0, xmm15, xmm11, 0
  12240. vpclmulqdq xmm3, xmm15, xmm11, 17
  12241. vpxor xmm2, xmm2, xmm1
  12242. vpxor xmm6, xmm6, xmm3
  12243. vpxor xmm5, xmm5, xmm2
  12244. vpxor xmm4, xmm4, xmm0
  12245. vpclmulqdq xmm2, xmm7, xmm10, 16
  12246. vpclmulqdq xmm1, xmm7, xmm10, 1
  12247. vpclmulqdq xmm0, xmm7, xmm10, 0
  12248. vpclmulqdq xmm3, xmm7, xmm10, 17
  12249. vpxor xmm2, xmm2, xmm1
  12250. vpxor xmm6, xmm6, xmm3
  12251. vpxor xmm5, xmm5, xmm2
  12252. vpxor xmm4, xmm4, xmm0
  12253. vmovdqu xmm15, OWORD PTR [rsp+96]
  12254. vmovdqu xmm7, OWORD PTR [rsp+112]
  12255. vpclmulqdq xmm2, xmm15, xmm9, 16
  12256. vpclmulqdq xmm1, xmm15, xmm9, 1
  12257. vpclmulqdq xmm0, xmm15, xmm9, 0
  12258. vpclmulqdq xmm3, xmm15, xmm9, 17
  12259. vpxor xmm2, xmm2, xmm1
  12260. vpxor xmm6, xmm6, xmm3
  12261. vpxor xmm5, xmm5, xmm2
  12262. vpxor xmm4, xmm4, xmm0
  12263. vpclmulqdq xmm2, xmm7, xmm8, 16
  12264. vpclmulqdq xmm1, xmm7, xmm8, 1
  12265. vpclmulqdq xmm0, xmm7, xmm8, 0
  12266. vpclmulqdq xmm3, xmm7, xmm8, 17
  12267. vpxor xmm2, xmm2, xmm1
  12268. vpxor xmm6, xmm6, xmm3
  12269. vpxor xmm5, xmm5, xmm2
  12270. vpxor xmm4, xmm4, xmm0
  12271. vpslldq xmm7, xmm5, 8
  12272. vpsrldq xmm5, xmm5, 8
  12273. vpxor xmm4, xmm4, xmm7
  12274. vpxor xmm6, xmm6, xmm5
  12275. ; ghash_red
  12276. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  12277. vpclmulqdq xmm0, xmm4, xmm2, 16
  12278. vpshufd xmm1, xmm4, 78
  12279. vpxor xmm1, xmm1, xmm0
  12280. vpclmulqdq xmm0, xmm1, xmm2, 16
  12281. vpshufd xmm1, xmm1, 78
  12282. vpxor xmm1, xmm1, xmm0
  12283. vpxor xmm6, xmm6, xmm1
  12284. vmovdqu xmm5, OWORD PTR [rsp]
  12285. vmovdqu xmm4, OWORD PTR [rsp+128]
  12286. vmovdqu xmm15, OWORD PTR [rsp+144]
  12287. L_AES_GCM_encrypt_avx2_done_128:
  12288. cmp ebx, r10d
  12289. je L_AES_GCM_encrypt_avx2_done_enc
  12290. mov r13d, r10d
  12291. and r13d, 4294967280
  12292. cmp ebx, r13d
  12293. jge L_AES_GCM_encrypt_avx2_last_block_done
  12294. ; aesenc_block
  12295. vmovdqu xmm1, xmm4
  12296. vpshufb xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  12297. vpaddd xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_one
  12298. vpxor xmm0, xmm0, [rsi]
  12299. vmovdqu xmm2, OWORD PTR [rsi+16]
  12300. vaesenc xmm0, xmm0, xmm2
  12301. vmovdqu xmm2, OWORD PTR [rsi+32]
  12302. vaesenc xmm0, xmm0, xmm2
  12303. vmovdqu xmm2, OWORD PTR [rsi+48]
  12304. vaesenc xmm0, xmm0, xmm2
  12305. vmovdqu xmm2, OWORD PTR [rsi+64]
  12306. vaesenc xmm0, xmm0, xmm2
  12307. vmovdqu xmm2, OWORD PTR [rsi+80]
  12308. vaesenc xmm0, xmm0, xmm2
  12309. vmovdqu xmm2, OWORD PTR [rsi+96]
  12310. vaesenc xmm0, xmm0, xmm2
  12311. vmovdqu xmm2, OWORD PTR [rsi+112]
  12312. vaesenc xmm0, xmm0, xmm2
  12313. vmovdqu xmm2, OWORD PTR [rsi+128]
  12314. vaesenc xmm0, xmm0, xmm2
  12315. vmovdqu xmm2, OWORD PTR [rsi+144]
  12316. vaesenc xmm0, xmm0, xmm2
  12317. vmovdqu xmm4, xmm1
  12318. cmp r9d, 11
  12319. vmovdqu xmm1, OWORD PTR [rsi+160]
  12320. jl L_AES_GCM_encrypt_avx2_aesenc_block_last
  12321. vaesenc xmm0, xmm0, xmm1
  12322. vmovdqu xmm2, OWORD PTR [rsi+176]
  12323. vaesenc xmm0, xmm0, xmm2
  12324. cmp r9d, 13
  12325. vmovdqu xmm1, OWORD PTR [rsi+192]
  12326. jl L_AES_GCM_encrypt_avx2_aesenc_block_last
  12327. vaesenc xmm0, xmm0, xmm1
  12328. vmovdqu xmm2, OWORD PTR [rsi+208]
  12329. vaesenc xmm0, xmm0, xmm2
  12330. vmovdqu xmm1, OWORD PTR [rsi+224]
  12331. L_AES_GCM_encrypt_avx2_aesenc_block_last:
  12332. vaesenclast xmm0, xmm0, xmm1
  12333. vmovdqu xmm1, OWORD PTR [rdi+rbx]
  12334. vpxor xmm0, xmm0, xmm1
  12335. vmovdqu OWORD PTR [r8+rbx], xmm0
  12336. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12337. vpxor xmm6, xmm6, xmm0
  12338. add ebx, 16
  12339. cmp ebx, r13d
  12340. jge L_AES_GCM_encrypt_avx2_last_block_ghash
  12341. L_AES_GCM_encrypt_avx2_last_block_start:
  12342. vmovdqu xmm12, OWORD PTR [rdi+rbx]
  12343. vpshufb xmm11, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  12344. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  12345. ; aesenc_gfmul_sb
  12346. vpclmulqdq xmm2, xmm6, xmm5, 1
  12347. vpclmulqdq xmm3, xmm6, xmm5, 16
  12348. vpclmulqdq xmm1, xmm6, xmm5, 0
  12349. vpclmulqdq xmm8, xmm6, xmm5, 17
  12350. vpxor xmm11, xmm11, [rsi]
  12351. vaesenc xmm11, xmm11, [rsi+16]
  12352. vpxor xmm3, xmm3, xmm2
  12353. vpslldq xmm2, xmm3, 8
  12354. vpsrldq xmm3, xmm3, 8
  12355. vaesenc xmm11, xmm11, [rsi+32]
  12356. vpxor xmm2, xmm2, xmm1
  12357. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12358. vaesenc xmm11, xmm11, [rsi+48]
  12359. vaesenc xmm11, xmm11, [rsi+64]
  12360. vaesenc xmm11, xmm11, [rsi+80]
  12361. vpshufd xmm2, xmm2, 78
  12362. vpxor xmm2, xmm2, xmm1
  12363. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12364. vaesenc xmm11, xmm11, [rsi+96]
  12365. vaesenc xmm11, xmm11, [rsi+112]
  12366. vaesenc xmm11, xmm11, [rsi+128]
  12367. vpshufd xmm2, xmm2, 78
  12368. vaesenc xmm11, xmm11, [rsi+144]
  12369. vpxor xmm8, xmm8, xmm3
  12370. vpxor xmm2, xmm2, xmm8
  12371. vmovdqu xmm0, OWORD PTR [rsi+160]
  12372. cmp r9d, 11
  12373. jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last
  12374. vaesenc xmm11, xmm11, xmm0
  12375. vaesenc xmm11, xmm11, [rsi+176]
  12376. vmovdqu xmm0, OWORD PTR [rsi+192]
  12377. cmp r9d, 13
  12378. jl L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last
  12379. vaesenc xmm11, xmm11, xmm0
  12380. vaesenc xmm11, xmm11, [rsi+208]
  12381. vmovdqu xmm0, OWORD PTR [rsi+224]
  12382. L_AES_GCM_encrypt_avx2_aesenc_gfmul_sb_last:
  12383. vaesenclast xmm11, xmm11, xmm0
  12384. vpxor xmm6, xmm2, xmm1
  12385. vpxor xmm11, xmm11, xmm12
  12386. vmovdqu OWORD PTR [r8+rbx], xmm11
  12387. vpshufb xmm11, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12388. vpxor xmm6, xmm6, xmm11
  12389. add ebx, 16
  12390. cmp ebx, r13d
  12391. jl L_AES_GCM_encrypt_avx2_last_block_start
  12392. L_AES_GCM_encrypt_avx2_last_block_ghash:
  12393. ; ghash_gfmul_red
  12394. vpclmulqdq xmm10, xmm6, xmm5, 16
  12395. vpclmulqdq xmm9, xmm6, xmm5, 1
  12396. vpclmulqdq xmm8, xmm6, xmm5, 0
  12397. vpxor xmm10, xmm10, xmm9
  12398. vpslldq xmm9, xmm10, 8
  12399. vpsrldq xmm10, xmm10, 8
  12400. vpxor xmm9, xmm9, xmm8
  12401. vpclmulqdq xmm6, xmm6, xmm5, 17
  12402. vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12403. vpshufd xmm9, xmm9, 78
  12404. vpxor xmm9, xmm9, xmm8
  12405. vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12406. vpshufd xmm9, xmm9, 78
  12407. vpxor xmm6, xmm6, xmm10
  12408. vpxor xmm6, xmm6, xmm9
  12409. vpxor xmm6, xmm6, xmm8
  12410. L_AES_GCM_encrypt_avx2_last_block_done:
  12411. mov ecx, r10d
  12412. mov edx, r10d
  12413. and ecx, 15
  12414. jz L_AES_GCM_encrypt_avx2_done_enc
  12415. ; aesenc_last15_enc
  12416. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  12417. vpxor xmm4, xmm4, [rsi]
  12418. vaesenc xmm4, xmm4, [rsi+16]
  12419. vaesenc xmm4, xmm4, [rsi+32]
  12420. vaesenc xmm4, xmm4, [rsi+48]
  12421. vaesenc xmm4, xmm4, [rsi+64]
  12422. vaesenc xmm4, xmm4, [rsi+80]
  12423. vaesenc xmm4, xmm4, [rsi+96]
  12424. vaesenc xmm4, xmm4, [rsi+112]
  12425. vaesenc xmm4, xmm4, [rsi+128]
  12426. vaesenc xmm4, xmm4, [rsi+144]
  12427. cmp r9d, 11
  12428. vmovdqu xmm0, OWORD PTR [rsi+160]
  12429. jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last
  12430. vaesenc xmm4, xmm4, xmm0
  12431. vaesenc xmm4, xmm4, [rsi+176]
  12432. cmp r9d, 13
  12433. vmovdqu xmm0, OWORD PTR [rsi+192]
  12434. jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last
  12435. vaesenc xmm4, xmm4, xmm0
  12436. vaesenc xmm4, xmm4, [rsi+208]
  12437. vmovdqu xmm0, OWORD PTR [rsi+224]
  12438. L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_aesenc_avx_last:
  12439. vaesenclast xmm4, xmm4, xmm0
  12440. xor ecx, ecx
  12441. vpxor xmm0, xmm0, xmm0
  12442. vmovdqu OWORD PTR [rsp], xmm4
  12443. vmovdqu OWORD PTR [rsp+16], xmm0
  12444. L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop:
  12445. movzx r13d, BYTE PTR [rdi+rbx]
  12446. xor r13b, BYTE PTR [rsp+rcx]
  12447. mov BYTE PTR [rsp+rcx+16], r13b
  12448. mov BYTE PTR [r8+rbx], r13b
  12449. inc ebx
  12450. inc ecx
  12451. cmp ebx, edx
  12452. jl L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_loop
  12453. L_AES_GCM_encrypt_avx2_aesenc_last15_enc_avx_finish_enc:
  12454. vmovdqu xmm4, OWORD PTR [rsp+16]
  12455. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12456. vpxor xmm6, xmm6, xmm4
  12457. ; ghash_gfmul_red
  12458. vpclmulqdq xmm2, xmm6, xmm5, 16
  12459. vpclmulqdq xmm1, xmm6, xmm5, 1
  12460. vpclmulqdq xmm0, xmm6, xmm5, 0
  12461. vpxor xmm2, xmm2, xmm1
  12462. vpslldq xmm1, xmm2, 8
  12463. vpsrldq xmm2, xmm2, 8
  12464. vpxor xmm1, xmm1, xmm0
  12465. vpclmulqdq xmm6, xmm6, xmm5, 17
  12466. vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12467. vpshufd xmm1, xmm1, 78
  12468. vpxor xmm1, xmm1, xmm0
  12469. vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12470. vpshufd xmm1, xmm1, 78
  12471. vpxor xmm6, xmm6, xmm2
  12472. vpxor xmm6, xmm6, xmm1
  12473. vpxor xmm6, xmm6, xmm0
  12474. L_AES_GCM_encrypt_avx2_done_enc:
  12475. ; calc_tag
  12476. shl r10, 3
  12477. shl r11, 3
  12478. vmovq xmm0, r10
  12479. vmovq xmm1, r11
  12480. vpunpcklqdq xmm0, xmm0, xmm1
  12481. vpxor xmm0, xmm0, xmm6
  12482. ; ghash_gfmul_red
  12483. vpclmulqdq xmm4, xmm0, xmm5, 16
  12484. vpclmulqdq xmm3, xmm0, xmm5, 1
  12485. vpclmulqdq xmm2, xmm0, xmm5, 0
  12486. vpxor xmm4, xmm4, xmm3
  12487. vpslldq xmm3, xmm4, 8
  12488. vpsrldq xmm4, xmm4, 8
  12489. vpxor xmm3, xmm3, xmm2
  12490. vpclmulqdq xmm0, xmm0, xmm5, 17
  12491. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12492. vpshufd xmm3, xmm3, 78
  12493. vpxor xmm3, xmm3, xmm2
  12494. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  12495. vpshufd xmm3, xmm3, 78
  12496. vpxor xmm0, xmm0, xmm4
  12497. vpxor xmm0, xmm0, xmm3
  12498. vpxor xmm0, xmm0, xmm2
  12499. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12500. vpxor xmm0, xmm0, xmm15
  12501. ; store_tag
  12502. cmp r14d, 16
  12503. je L_AES_GCM_encrypt_avx2_store_tag_16
  12504. xor rcx, rcx
  12505. vmovdqu OWORD PTR [rsp], xmm0
  12506. L_AES_GCM_encrypt_avx2_store_tag_loop:
  12507. movzx r13d, BYTE PTR [rsp+rcx]
  12508. mov BYTE PTR [r15+rcx], r13b
  12509. inc ecx
  12510. cmp ecx, r14d
  12511. jne L_AES_GCM_encrypt_avx2_store_tag_loop
  12512. jmp L_AES_GCM_encrypt_avx2_store_tag_done
  12513. L_AES_GCM_encrypt_avx2_store_tag_16:
  12514. vmovdqu OWORD PTR [r15], xmm0
  12515. L_AES_GCM_encrypt_avx2_store_tag_done:
  12516. vzeroupper
  12517. add rsp, 160
  12518. pop rsi
  12519. pop r14
  12520. pop rbx
  12521. pop r15
  12522. pop r12
  12523. pop rdi
  12524. pop r13
  12525. ret
  12526. AES_GCM_encrypt_avx2 ENDP
  12527. _text ENDS
  12528. _text SEGMENT READONLY PARA
  12529. AES_GCM_decrypt_avx2 PROC
  12530. push r13
  12531. push rdi
  12532. push r12
  12533. push r14
  12534. push rbx
  12535. push r15
  12536. push rsi
  12537. push rbp
  12538. mov rdi, rcx
  12539. mov r12, r8
  12540. mov rax, r9
  12541. mov r14, QWORD PTR [rsp+104]
  12542. mov r8, rdx
  12543. mov r10d, DWORD PTR [rsp+112]
  12544. mov r11d, DWORD PTR [rsp+120]
  12545. mov ebx, DWORD PTR [rsp+128]
  12546. mov r15d, DWORD PTR [rsp+136]
  12547. mov rsi, QWORD PTR [rsp+144]
  12548. mov r9d, DWORD PTR [rsp+152]
  12549. mov rbp, QWORD PTR [rsp+160]
  12550. sub rsp, 168
  12551. vpxor xmm4, xmm4, xmm4
  12552. vpxor xmm6, xmm6, xmm6
  12553. mov edx, ebx
  12554. cmp edx, 12
  12555. je L_AES_GCM_decrypt_avx2_iv_12
  12556. ; Calculate values when IV is not 12 bytes
  12557. ; H = Encrypt X(=0)
  12558. vmovdqu xmm5, OWORD PTR [rsi]
  12559. vaesenc xmm5, xmm5, [rsi+16]
  12560. vaesenc xmm5, xmm5, [rsi+32]
  12561. vaesenc xmm5, xmm5, [rsi+48]
  12562. vaesenc xmm5, xmm5, [rsi+64]
  12563. vaesenc xmm5, xmm5, [rsi+80]
  12564. vaesenc xmm5, xmm5, [rsi+96]
  12565. vaesenc xmm5, xmm5, [rsi+112]
  12566. vaesenc xmm5, xmm5, [rsi+128]
  12567. vaesenc xmm5, xmm5, [rsi+144]
  12568. cmp r9d, 11
  12569. vmovdqu xmm0, OWORD PTR [rsi+160]
  12570. jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last
  12571. vaesenc xmm5, xmm5, xmm0
  12572. vaesenc xmm5, xmm5, [rsi+176]
  12573. cmp r9d, 13
  12574. vmovdqu xmm0, OWORD PTR [rsi+192]
  12575. jl L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last
  12576. vaesenc xmm5, xmm5, xmm0
  12577. vaesenc xmm5, xmm5, [rsi+208]
  12578. vmovdqu xmm0, OWORD PTR [rsi+224]
  12579. L_AES_GCM_decrypt_avx2_calc_iv_1_aesenc_avx_last:
  12580. vaesenclast xmm5, xmm5, xmm0
  12581. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12582. ; Calc counter
  12583. ; Initialization vector
  12584. cmp edx, 0
  12585. mov rcx, 0
  12586. je L_AES_GCM_decrypt_avx2_calc_iv_done
  12587. cmp edx, 16
  12588. jl L_AES_GCM_decrypt_avx2_calc_iv_lt16
  12589. and edx, 4294967280
  12590. L_AES_GCM_decrypt_avx2_calc_iv_16_loop:
  12591. vmovdqu xmm0, OWORD PTR [rax+rcx]
  12592. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12593. vpxor xmm4, xmm4, xmm0
  12594. ; ghash_gfmul_avx
  12595. vpclmulqdq xmm2, xmm5, xmm4, 16
  12596. vpclmulqdq xmm1, xmm5, xmm4, 1
  12597. vpclmulqdq xmm0, xmm5, xmm4, 0
  12598. vpclmulqdq xmm3, xmm5, xmm4, 17
  12599. vpxor xmm2, xmm2, xmm1
  12600. vpslldq xmm1, xmm2, 8
  12601. vpsrldq xmm2, xmm2, 8
  12602. vpxor xmm7, xmm0, xmm1
  12603. vpxor xmm4, xmm3, xmm2
  12604. ; ghash_mid
  12605. vpsrld xmm0, xmm7, 31
  12606. vpsrld xmm1, xmm4, 31
  12607. vpslld xmm7, xmm7, 1
  12608. vpslld xmm4, xmm4, 1
  12609. vpsrldq xmm2, xmm0, 12
  12610. vpslldq xmm0, xmm0, 4
  12611. vpslldq xmm1, xmm1, 4
  12612. vpor xmm4, xmm4, xmm2
  12613. vpor xmm7, xmm7, xmm0
  12614. vpor xmm4, xmm4, xmm1
  12615. ; ghash_red
  12616. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  12617. vpclmulqdq xmm0, xmm7, xmm2, 16
  12618. vpshufd xmm1, xmm7, 78
  12619. vpxor xmm1, xmm1, xmm0
  12620. vpclmulqdq xmm0, xmm1, xmm2, 16
  12621. vpshufd xmm1, xmm1, 78
  12622. vpxor xmm1, xmm1, xmm0
  12623. vpxor xmm4, xmm4, xmm1
  12624. add ecx, 16
  12625. cmp ecx, edx
  12626. jl L_AES_GCM_decrypt_avx2_calc_iv_16_loop
  12627. mov edx, ebx
  12628. cmp ecx, edx
  12629. je L_AES_GCM_decrypt_avx2_calc_iv_done
  12630. L_AES_GCM_decrypt_avx2_calc_iv_lt16:
  12631. vpxor xmm0, xmm0, xmm0
  12632. xor ebx, ebx
  12633. vmovdqu OWORD PTR [rsp], xmm0
  12634. L_AES_GCM_decrypt_avx2_calc_iv_loop:
  12635. movzx r13d, BYTE PTR [rax+rcx]
  12636. mov BYTE PTR [rsp+rbx], r13b
  12637. inc ecx
  12638. inc ebx
  12639. cmp ecx, edx
  12640. jl L_AES_GCM_decrypt_avx2_calc_iv_loop
  12641. vmovdqu xmm0, OWORD PTR [rsp]
  12642. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12643. vpxor xmm4, xmm4, xmm0
  12644. ; ghash_gfmul_avx
  12645. vpclmulqdq xmm2, xmm5, xmm4, 16
  12646. vpclmulqdq xmm1, xmm5, xmm4, 1
  12647. vpclmulqdq xmm0, xmm5, xmm4, 0
  12648. vpclmulqdq xmm3, xmm5, xmm4, 17
  12649. vpxor xmm2, xmm2, xmm1
  12650. vpslldq xmm1, xmm2, 8
  12651. vpsrldq xmm2, xmm2, 8
  12652. vpxor xmm7, xmm0, xmm1
  12653. vpxor xmm4, xmm3, xmm2
  12654. ; ghash_mid
  12655. vpsrld xmm0, xmm7, 31
  12656. vpsrld xmm1, xmm4, 31
  12657. vpslld xmm7, xmm7, 1
  12658. vpslld xmm4, xmm4, 1
  12659. vpsrldq xmm2, xmm0, 12
  12660. vpslldq xmm0, xmm0, 4
  12661. vpslldq xmm1, xmm1, 4
  12662. vpor xmm4, xmm4, xmm2
  12663. vpor xmm7, xmm7, xmm0
  12664. vpor xmm4, xmm4, xmm1
  12665. ; ghash_red
  12666. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  12667. vpclmulqdq xmm0, xmm7, xmm2, 16
  12668. vpshufd xmm1, xmm7, 78
  12669. vpxor xmm1, xmm1, xmm0
  12670. vpclmulqdq xmm0, xmm1, xmm2, 16
  12671. vpshufd xmm1, xmm1, 78
  12672. vpxor xmm1, xmm1, xmm0
  12673. vpxor xmm4, xmm4, xmm1
  12674. L_AES_GCM_decrypt_avx2_calc_iv_done:
  12675. ; T = Encrypt counter
  12676. vpxor xmm0, xmm0, xmm0
  12677. shl edx, 3
  12678. vmovq xmm0, rdx
  12679. vpxor xmm4, xmm4, xmm0
  12680. ; ghash_gfmul_avx
  12681. vpclmulqdq xmm2, xmm5, xmm4, 16
  12682. vpclmulqdq xmm1, xmm5, xmm4, 1
  12683. vpclmulqdq xmm0, xmm5, xmm4, 0
  12684. vpclmulqdq xmm3, xmm5, xmm4, 17
  12685. vpxor xmm2, xmm2, xmm1
  12686. vpslldq xmm1, xmm2, 8
  12687. vpsrldq xmm2, xmm2, 8
  12688. vpxor xmm7, xmm0, xmm1
  12689. vpxor xmm4, xmm3, xmm2
  12690. ; ghash_mid
  12691. vpsrld xmm0, xmm7, 31
  12692. vpsrld xmm1, xmm4, 31
  12693. vpslld xmm7, xmm7, 1
  12694. vpslld xmm4, xmm4, 1
  12695. vpsrldq xmm2, xmm0, 12
  12696. vpslldq xmm0, xmm0, 4
  12697. vpslldq xmm1, xmm1, 4
  12698. vpor xmm4, xmm4, xmm2
  12699. vpor xmm7, xmm7, xmm0
  12700. vpor xmm4, xmm4, xmm1
  12701. ; ghash_red
  12702. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  12703. vpclmulqdq xmm0, xmm7, xmm2, 16
  12704. vpshufd xmm1, xmm7, 78
  12705. vpxor xmm1, xmm1, xmm0
  12706. vpclmulqdq xmm0, xmm1, xmm2, 16
  12707. vpshufd xmm1, xmm1, 78
  12708. vpxor xmm1, xmm1, xmm0
  12709. vpxor xmm4, xmm4, xmm1
  12710. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12711. ; Encrypt counter
  12712. vmovdqu xmm15, OWORD PTR [rsi]
  12713. vpxor xmm15, xmm15, xmm4
  12714. vaesenc xmm15, xmm15, [rsi+16]
  12715. vaesenc xmm15, xmm15, [rsi+32]
  12716. vaesenc xmm15, xmm15, [rsi+48]
  12717. vaesenc xmm15, xmm15, [rsi+64]
  12718. vaesenc xmm15, xmm15, [rsi+80]
  12719. vaesenc xmm15, xmm15, [rsi+96]
  12720. vaesenc xmm15, xmm15, [rsi+112]
  12721. vaesenc xmm15, xmm15, [rsi+128]
  12722. vaesenc xmm15, xmm15, [rsi+144]
  12723. cmp r9d, 11
  12724. vmovdqu xmm0, OWORD PTR [rsi+160]
  12725. jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last
  12726. vaesenc xmm15, xmm15, xmm0
  12727. vaesenc xmm15, xmm15, [rsi+176]
  12728. cmp r9d, 13
  12729. vmovdqu xmm0, OWORD PTR [rsi+192]
  12730. jl L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last
  12731. vaesenc xmm15, xmm15, xmm0
  12732. vaesenc xmm15, xmm15, [rsi+208]
  12733. vmovdqu xmm0, OWORD PTR [rsi+224]
  12734. L_AES_GCM_decrypt_avx2_calc_iv_2_aesenc_avx_last:
  12735. vaesenclast xmm15, xmm15, xmm0
  12736. jmp L_AES_GCM_decrypt_avx2_iv_done
  12737. L_AES_GCM_decrypt_avx2_iv_12:
  12738. ; # Calculate values when IV is 12 bytes
  12739. ; Set counter based on IV
  12740. vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_one
  12741. vmovdqu xmm5, OWORD PTR [rsi]
  12742. vpblendd xmm4, xmm4, [rax], 7
  12743. ; H = Encrypt X(=0) and T = Encrypt counter
  12744. vmovdqu xmm7, OWORD PTR [rsi+16]
  12745. vpxor xmm15, xmm4, xmm5
  12746. vaesenc xmm5, xmm5, xmm7
  12747. vaesenc xmm15, xmm15, xmm7
  12748. vmovdqu xmm0, OWORD PTR [rsi+32]
  12749. vaesenc xmm5, xmm5, xmm0
  12750. vaesenc xmm15, xmm15, xmm0
  12751. vmovdqu xmm0, OWORD PTR [rsi+48]
  12752. vaesenc xmm5, xmm5, xmm0
  12753. vaesenc xmm15, xmm15, xmm0
  12754. vmovdqu xmm0, OWORD PTR [rsi+64]
  12755. vaesenc xmm5, xmm5, xmm0
  12756. vaesenc xmm15, xmm15, xmm0
  12757. vmovdqu xmm0, OWORD PTR [rsi+80]
  12758. vaesenc xmm5, xmm5, xmm0
  12759. vaesenc xmm15, xmm15, xmm0
  12760. vmovdqu xmm0, OWORD PTR [rsi+96]
  12761. vaesenc xmm5, xmm5, xmm0
  12762. vaesenc xmm15, xmm15, xmm0
  12763. vmovdqu xmm0, OWORD PTR [rsi+112]
  12764. vaesenc xmm5, xmm5, xmm0
  12765. vaesenc xmm15, xmm15, xmm0
  12766. vmovdqu xmm0, OWORD PTR [rsi+128]
  12767. vaesenc xmm5, xmm5, xmm0
  12768. vaesenc xmm15, xmm15, xmm0
  12769. vmovdqu xmm0, OWORD PTR [rsi+144]
  12770. vaesenc xmm5, xmm5, xmm0
  12771. vaesenc xmm15, xmm15, xmm0
  12772. cmp r9d, 11
  12773. vmovdqu xmm0, OWORD PTR [rsi+160]
  12774. jl L_AES_GCM_decrypt_avx2_calc_iv_12_last
  12775. vaesenc xmm5, xmm5, xmm0
  12776. vaesenc xmm15, xmm15, xmm0
  12777. vmovdqu xmm0, OWORD PTR [rsi+176]
  12778. vaesenc xmm5, xmm5, xmm0
  12779. vaesenc xmm15, xmm15, xmm0
  12780. cmp r9d, 13
  12781. vmovdqu xmm0, OWORD PTR [rsi+192]
  12782. jl L_AES_GCM_decrypt_avx2_calc_iv_12_last
  12783. vaesenc xmm5, xmm5, xmm0
  12784. vaesenc xmm15, xmm15, xmm0
  12785. vmovdqu xmm0, OWORD PTR [rsi+208]
  12786. vaesenc xmm5, xmm5, xmm0
  12787. vaesenc xmm15, xmm15, xmm0
  12788. vmovdqu xmm0, OWORD PTR [rsi+224]
  12789. L_AES_GCM_decrypt_avx2_calc_iv_12_last:
  12790. vaesenclast xmm5, xmm5, xmm0
  12791. vaesenclast xmm15, xmm15, xmm0
  12792. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12793. L_AES_GCM_decrypt_avx2_iv_done:
  12794. ; Additional authentication data
  12795. mov edx, r11d
  12796. cmp edx, 0
  12797. je L_AES_GCM_decrypt_avx2_calc_aad_done
  12798. xor ecx, ecx
  12799. cmp edx, 16
  12800. jl L_AES_GCM_decrypt_avx2_calc_aad_lt16
  12801. and edx, 4294967280
  12802. L_AES_GCM_decrypt_avx2_calc_aad_16_loop:
  12803. vmovdqu xmm0, OWORD PTR [r12+rcx]
  12804. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12805. vpxor xmm6, xmm6, xmm0
  12806. ; ghash_gfmul_avx
  12807. vpclmulqdq xmm2, xmm5, xmm6, 16
  12808. vpclmulqdq xmm1, xmm5, xmm6, 1
  12809. vpclmulqdq xmm0, xmm5, xmm6, 0
  12810. vpclmulqdq xmm3, xmm5, xmm6, 17
  12811. vpxor xmm2, xmm2, xmm1
  12812. vpslldq xmm1, xmm2, 8
  12813. vpsrldq xmm2, xmm2, 8
  12814. vpxor xmm7, xmm0, xmm1
  12815. vpxor xmm6, xmm3, xmm2
  12816. ; ghash_mid
  12817. vpsrld xmm0, xmm7, 31
  12818. vpsrld xmm1, xmm6, 31
  12819. vpslld xmm7, xmm7, 1
  12820. vpslld xmm6, xmm6, 1
  12821. vpsrldq xmm2, xmm0, 12
  12822. vpslldq xmm0, xmm0, 4
  12823. vpslldq xmm1, xmm1, 4
  12824. vpor xmm6, xmm6, xmm2
  12825. vpor xmm7, xmm7, xmm0
  12826. vpor xmm6, xmm6, xmm1
  12827. ; ghash_red
  12828. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  12829. vpclmulqdq xmm0, xmm7, xmm2, 16
  12830. vpshufd xmm1, xmm7, 78
  12831. vpxor xmm1, xmm1, xmm0
  12832. vpclmulqdq xmm0, xmm1, xmm2, 16
  12833. vpshufd xmm1, xmm1, 78
  12834. vpxor xmm1, xmm1, xmm0
  12835. vpxor xmm6, xmm6, xmm1
  12836. add ecx, 16
  12837. cmp ecx, edx
  12838. jl L_AES_GCM_decrypt_avx2_calc_aad_16_loop
  12839. mov edx, r11d
  12840. cmp ecx, edx
  12841. je L_AES_GCM_decrypt_avx2_calc_aad_done
  12842. L_AES_GCM_decrypt_avx2_calc_aad_lt16:
  12843. vpxor xmm0, xmm0, xmm0
  12844. xor ebx, ebx
  12845. vmovdqu OWORD PTR [rsp], xmm0
  12846. L_AES_GCM_decrypt_avx2_calc_aad_loop:
  12847. movzx r13d, BYTE PTR [r12+rcx]
  12848. mov BYTE PTR [rsp+rbx], r13b
  12849. inc ecx
  12850. inc ebx
  12851. cmp ecx, edx
  12852. jl L_AES_GCM_decrypt_avx2_calc_aad_loop
  12853. vmovdqu xmm0, OWORD PTR [rsp]
  12854. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  12855. vpxor xmm6, xmm6, xmm0
  12856. ; ghash_gfmul_avx
  12857. vpclmulqdq xmm2, xmm5, xmm6, 16
  12858. vpclmulqdq xmm1, xmm5, xmm6, 1
  12859. vpclmulqdq xmm0, xmm5, xmm6, 0
  12860. vpclmulqdq xmm3, xmm5, xmm6, 17
  12861. vpxor xmm2, xmm2, xmm1
  12862. vpslldq xmm1, xmm2, 8
  12863. vpsrldq xmm2, xmm2, 8
  12864. vpxor xmm7, xmm0, xmm1
  12865. vpxor xmm6, xmm3, xmm2
  12866. ; ghash_mid
  12867. vpsrld xmm0, xmm7, 31
  12868. vpsrld xmm1, xmm6, 31
  12869. vpslld xmm7, xmm7, 1
  12870. vpslld xmm6, xmm6, 1
  12871. vpsrldq xmm2, xmm0, 12
  12872. vpslldq xmm0, xmm0, 4
  12873. vpslldq xmm1, xmm1, 4
  12874. vpor xmm6, xmm6, xmm2
  12875. vpor xmm7, xmm7, xmm0
  12876. vpor xmm6, xmm6, xmm1
  12877. ; ghash_red
  12878. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  12879. vpclmulqdq xmm0, xmm7, xmm2, 16
  12880. vpshufd xmm1, xmm7, 78
  12881. vpxor xmm1, xmm1, xmm0
  12882. vpclmulqdq xmm0, xmm1, xmm2, 16
  12883. vpshufd xmm1, xmm1, 78
  12884. vpxor xmm1, xmm1, xmm0
  12885. vpxor xmm6, xmm6, xmm1
  12886. L_AES_GCM_decrypt_avx2_calc_aad_done:
  12887. ; Calculate counter and H
  12888. vpsrlq xmm1, xmm5, 63
  12889. vpsllq xmm0, xmm5, 1
  12890. vpslldq xmm1, xmm1, 8
  12891. vpor xmm0, xmm0, xmm1
  12892. vpshufd xmm5, xmm5, 255
  12893. vpsrad xmm5, xmm5, 31
  12894. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  12895. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  12896. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  12897. vpxor xmm5, xmm5, xmm0
  12898. xor ebx, ebx
  12899. cmp r10d, 128
  12900. mov r13d, r10d
  12901. jl L_AES_GCM_decrypt_avx2_done_128
  12902. and r13d, 4294967168
  12903. vmovdqu OWORD PTR [rsp+128], xmm4
  12904. vmovdqu OWORD PTR [rsp+144], xmm15
  12905. vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
  12906. ; H ^ 1 and H ^ 2
  12907. vpclmulqdq xmm9, xmm5, xmm5, 0
  12908. vpclmulqdq xmm10, xmm5, xmm5, 17
  12909. vpclmulqdq xmm8, xmm9, xmm3, 16
  12910. vpshufd xmm9, xmm9, 78
  12911. vpxor xmm9, xmm9, xmm8
  12912. vpclmulqdq xmm8, xmm9, xmm3, 16
  12913. vpshufd xmm9, xmm9, 78
  12914. vpxor xmm9, xmm9, xmm8
  12915. vpxor xmm0, xmm10, xmm9
  12916. vmovdqu OWORD PTR [rsp], xmm5
  12917. vmovdqu OWORD PTR [rsp+16], xmm0
  12918. ; H ^ 3 and H ^ 4
  12919. vpclmulqdq xmm11, xmm0, xmm5, 16
  12920. vpclmulqdq xmm10, xmm0, xmm5, 1
  12921. vpclmulqdq xmm9, xmm0, xmm5, 0
  12922. vpclmulqdq xmm12, xmm0, xmm5, 17
  12923. vpclmulqdq xmm13, xmm0, xmm0, 0
  12924. vpclmulqdq xmm14, xmm0, xmm0, 17
  12925. vpxor xmm11, xmm11, xmm10
  12926. vpslldq xmm10, xmm11, 8
  12927. vpsrldq xmm11, xmm11, 8
  12928. vpxor xmm10, xmm10, xmm9
  12929. vpclmulqdq xmm8, xmm13, xmm3, 16
  12930. vpclmulqdq xmm9, xmm10, xmm3, 16
  12931. vpshufd xmm10, xmm10, 78
  12932. vpshufd xmm13, xmm13, 78
  12933. vpxor xmm10, xmm10, xmm9
  12934. vpxor xmm13, xmm13, xmm8
  12935. vpclmulqdq xmm9, xmm10, xmm3, 16
  12936. vpclmulqdq xmm8, xmm13, xmm3, 16
  12937. vpshufd xmm10, xmm10, 78
  12938. vpshufd xmm13, xmm13, 78
  12939. vpxor xmm12, xmm12, xmm11
  12940. vpxor xmm13, xmm13, xmm8
  12941. vpxor xmm10, xmm10, xmm12
  12942. vpxor xmm2, xmm13, xmm14
  12943. vpxor xmm1, xmm10, xmm9
  12944. vmovdqu OWORD PTR [rsp+32], xmm1
  12945. vmovdqu OWORD PTR [rsp+48], xmm2
  12946. ; H ^ 5 and H ^ 6
  12947. vpclmulqdq xmm11, xmm1, xmm0, 16
  12948. vpclmulqdq xmm10, xmm1, xmm0, 1
  12949. vpclmulqdq xmm9, xmm1, xmm0, 0
  12950. vpclmulqdq xmm12, xmm1, xmm0, 17
  12951. vpclmulqdq xmm13, xmm1, xmm1, 0
  12952. vpclmulqdq xmm14, xmm1, xmm1, 17
  12953. vpxor xmm11, xmm11, xmm10
  12954. vpslldq xmm10, xmm11, 8
  12955. vpsrldq xmm11, xmm11, 8
  12956. vpxor xmm10, xmm10, xmm9
  12957. vpclmulqdq xmm8, xmm13, xmm3, 16
  12958. vpclmulqdq xmm9, xmm10, xmm3, 16
  12959. vpshufd xmm10, xmm10, 78
  12960. vpshufd xmm13, xmm13, 78
  12961. vpxor xmm10, xmm10, xmm9
  12962. vpxor xmm13, xmm13, xmm8
  12963. vpclmulqdq xmm9, xmm10, xmm3, 16
  12964. vpclmulqdq xmm8, xmm13, xmm3, 16
  12965. vpshufd xmm10, xmm10, 78
  12966. vpshufd xmm13, xmm13, 78
  12967. vpxor xmm12, xmm12, xmm11
  12968. vpxor xmm13, xmm13, xmm8
  12969. vpxor xmm10, xmm10, xmm12
  12970. vpxor xmm0, xmm13, xmm14
  12971. vpxor xmm7, xmm10, xmm9
  12972. vmovdqu OWORD PTR [rsp+64], xmm7
  12973. vmovdqu OWORD PTR [rsp+80], xmm0
  12974. ; H ^ 7 and H ^ 8
  12975. vpclmulqdq xmm11, xmm2, xmm1, 16
  12976. vpclmulqdq xmm10, xmm2, xmm1, 1
  12977. vpclmulqdq xmm9, xmm2, xmm1, 0
  12978. vpclmulqdq xmm12, xmm2, xmm1, 17
  12979. vpclmulqdq xmm13, xmm2, xmm2, 0
  12980. vpclmulqdq xmm14, xmm2, xmm2, 17
  12981. vpxor xmm11, xmm11, xmm10
  12982. vpslldq xmm10, xmm11, 8
  12983. vpsrldq xmm11, xmm11, 8
  12984. vpxor xmm10, xmm10, xmm9
  12985. vpclmulqdq xmm8, xmm13, xmm3, 16
  12986. vpclmulqdq xmm9, xmm10, xmm3, 16
  12987. vpshufd xmm10, xmm10, 78
  12988. vpshufd xmm13, xmm13, 78
  12989. vpxor xmm10, xmm10, xmm9
  12990. vpxor xmm13, xmm13, xmm8
  12991. vpclmulqdq xmm9, xmm10, xmm3, 16
  12992. vpclmulqdq xmm8, xmm13, xmm3, 16
  12993. vpshufd xmm10, xmm10, 78
  12994. vpshufd xmm13, xmm13, 78
  12995. vpxor xmm12, xmm12, xmm11
  12996. vpxor xmm13, xmm13, xmm8
  12997. vpxor xmm10, xmm10, xmm12
  12998. vpxor xmm0, xmm13, xmm14
  12999. vpxor xmm7, xmm10, xmm9
  13000. vmovdqu OWORD PTR [rsp+96], xmm7
  13001. vmovdqu OWORD PTR [rsp+112], xmm0
  13002. L_AES_GCM_decrypt_avx2_ghash_128:
  13003. ; aesenc_128_ghash
  13004. lea rcx, QWORD PTR [rdi+rbx]
  13005. lea rdx, QWORD PTR [r8+rbx]
  13006. ; aesenc_ctr
  13007. vmovdqu xmm0, OWORD PTR [rsp+128]
  13008. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  13009. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  13010. vpshufb xmm8, xmm0, xmm1
  13011. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  13012. vpshufb xmm9, xmm9, xmm1
  13013. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  13014. vpshufb xmm10, xmm10, xmm1
  13015. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  13016. vpshufb xmm11, xmm11, xmm1
  13017. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  13018. vpshufb xmm12, xmm12, xmm1
  13019. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  13020. vpshufb xmm13, xmm13, xmm1
  13021. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  13022. vpshufb xmm14, xmm14, xmm1
  13023. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  13024. vpshufb xmm15, xmm15, xmm1
  13025. ; aesenc_xor
  13026. vmovdqu xmm7, OWORD PTR [rsi]
  13027. vmovdqu OWORD PTR [rsp+128], xmm0
  13028. vpxor xmm8, xmm8, xmm7
  13029. vpxor xmm9, xmm9, xmm7
  13030. vpxor xmm10, xmm10, xmm7
  13031. vpxor xmm11, xmm11, xmm7
  13032. vpxor xmm12, xmm12, xmm7
  13033. vpxor xmm13, xmm13, xmm7
  13034. vpxor xmm14, xmm14, xmm7
  13035. vpxor xmm15, xmm15, xmm7
  13036. ; aesenc_pclmul_1
  13037. vmovdqu xmm1, OWORD PTR [rcx]
  13038. vmovdqu xmm0, OWORD PTR [rsi+16]
  13039. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13040. vmovdqu xmm2, OWORD PTR [rsp+112]
  13041. vpxor xmm1, xmm1, xmm6
  13042. vpclmulqdq xmm5, xmm1, xmm2, 16
  13043. vpclmulqdq xmm3, xmm1, xmm2, 1
  13044. vpclmulqdq xmm6, xmm1, xmm2, 0
  13045. vpclmulqdq xmm7, xmm1, xmm2, 17
  13046. vaesenc xmm8, xmm8, xmm0
  13047. vaesenc xmm9, xmm9, xmm0
  13048. vaesenc xmm10, xmm10, xmm0
  13049. vaesenc xmm11, xmm11, xmm0
  13050. vaesenc xmm12, xmm12, xmm0
  13051. vaesenc xmm13, xmm13, xmm0
  13052. vaesenc xmm14, xmm14, xmm0
  13053. vaesenc xmm15, xmm15, xmm0
  13054. ; aesenc_pclmul_2
  13055. vmovdqu xmm1, OWORD PTR [rcx+16]
  13056. vmovdqu xmm0, OWORD PTR [rsp+96]
  13057. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13058. vpxor xmm5, xmm5, xmm3
  13059. vpclmulqdq xmm2, xmm1, xmm0, 16
  13060. vpclmulqdq xmm3, xmm1, xmm0, 1
  13061. vpclmulqdq xmm4, xmm1, xmm0, 0
  13062. vpclmulqdq xmm1, xmm1, xmm0, 17
  13063. vmovdqu xmm0, OWORD PTR [rsi+32]
  13064. vpxor xmm7, xmm7, xmm1
  13065. vaesenc xmm8, xmm8, xmm0
  13066. vaesenc xmm9, xmm9, xmm0
  13067. vaesenc xmm10, xmm10, xmm0
  13068. vaesenc xmm11, xmm11, xmm0
  13069. vaesenc xmm12, xmm12, xmm0
  13070. vaesenc xmm13, xmm13, xmm0
  13071. vaesenc xmm14, xmm14, xmm0
  13072. vaesenc xmm15, xmm15, xmm0
  13073. ; aesenc_pclmul_n
  13074. vmovdqu xmm1, OWORD PTR [rcx+32]
  13075. vmovdqu xmm0, OWORD PTR [rsp+80]
  13076. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13077. vpxor xmm5, xmm5, xmm2
  13078. vpclmulqdq xmm2, xmm1, xmm0, 16
  13079. vpxor xmm5, xmm5, xmm3
  13080. vpclmulqdq xmm3, xmm1, xmm0, 1
  13081. vpxor xmm6, xmm6, xmm4
  13082. vpclmulqdq xmm4, xmm1, xmm0, 0
  13083. vpclmulqdq xmm1, xmm1, xmm0, 17
  13084. vmovdqu xmm0, OWORD PTR [rsi+48]
  13085. vpxor xmm7, xmm7, xmm1
  13086. vaesenc xmm8, xmm8, xmm0
  13087. vaesenc xmm9, xmm9, xmm0
  13088. vaesenc xmm10, xmm10, xmm0
  13089. vaesenc xmm11, xmm11, xmm0
  13090. vaesenc xmm12, xmm12, xmm0
  13091. vaesenc xmm13, xmm13, xmm0
  13092. vaesenc xmm14, xmm14, xmm0
  13093. vaesenc xmm15, xmm15, xmm0
  13094. ; aesenc_pclmul_n
  13095. vmovdqu xmm1, OWORD PTR [rcx+48]
  13096. vmovdqu xmm0, OWORD PTR [rsp+64]
  13097. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13098. vpxor xmm5, xmm5, xmm2
  13099. vpclmulqdq xmm2, xmm1, xmm0, 16
  13100. vpxor xmm5, xmm5, xmm3
  13101. vpclmulqdq xmm3, xmm1, xmm0, 1
  13102. vpxor xmm6, xmm6, xmm4
  13103. vpclmulqdq xmm4, xmm1, xmm0, 0
  13104. vpclmulqdq xmm1, xmm1, xmm0, 17
  13105. vmovdqu xmm0, OWORD PTR [rsi+64]
  13106. vpxor xmm7, xmm7, xmm1
  13107. vaesenc xmm8, xmm8, xmm0
  13108. vaesenc xmm9, xmm9, xmm0
  13109. vaesenc xmm10, xmm10, xmm0
  13110. vaesenc xmm11, xmm11, xmm0
  13111. vaesenc xmm12, xmm12, xmm0
  13112. vaesenc xmm13, xmm13, xmm0
  13113. vaesenc xmm14, xmm14, xmm0
  13114. vaesenc xmm15, xmm15, xmm0
  13115. ; aesenc_pclmul_n
  13116. vmovdqu xmm1, OWORD PTR [rcx+64]
  13117. vmovdqu xmm0, OWORD PTR [rsp+48]
  13118. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13119. vpxor xmm5, xmm5, xmm2
  13120. vpclmulqdq xmm2, xmm1, xmm0, 16
  13121. vpxor xmm5, xmm5, xmm3
  13122. vpclmulqdq xmm3, xmm1, xmm0, 1
  13123. vpxor xmm6, xmm6, xmm4
  13124. vpclmulqdq xmm4, xmm1, xmm0, 0
  13125. vpclmulqdq xmm1, xmm1, xmm0, 17
  13126. vmovdqu xmm0, OWORD PTR [rsi+80]
  13127. vpxor xmm7, xmm7, xmm1
  13128. vaesenc xmm8, xmm8, xmm0
  13129. vaesenc xmm9, xmm9, xmm0
  13130. vaesenc xmm10, xmm10, xmm0
  13131. vaesenc xmm11, xmm11, xmm0
  13132. vaesenc xmm12, xmm12, xmm0
  13133. vaesenc xmm13, xmm13, xmm0
  13134. vaesenc xmm14, xmm14, xmm0
  13135. vaesenc xmm15, xmm15, xmm0
  13136. ; aesenc_pclmul_n
  13137. vmovdqu xmm1, OWORD PTR [rcx+80]
  13138. vmovdqu xmm0, OWORD PTR [rsp+32]
  13139. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13140. vpxor xmm5, xmm5, xmm2
  13141. vpclmulqdq xmm2, xmm1, xmm0, 16
  13142. vpxor xmm5, xmm5, xmm3
  13143. vpclmulqdq xmm3, xmm1, xmm0, 1
  13144. vpxor xmm6, xmm6, xmm4
  13145. vpclmulqdq xmm4, xmm1, xmm0, 0
  13146. vpclmulqdq xmm1, xmm1, xmm0, 17
  13147. vmovdqu xmm0, OWORD PTR [rsi+96]
  13148. vpxor xmm7, xmm7, xmm1
  13149. vaesenc xmm8, xmm8, xmm0
  13150. vaesenc xmm9, xmm9, xmm0
  13151. vaesenc xmm10, xmm10, xmm0
  13152. vaesenc xmm11, xmm11, xmm0
  13153. vaesenc xmm12, xmm12, xmm0
  13154. vaesenc xmm13, xmm13, xmm0
  13155. vaesenc xmm14, xmm14, xmm0
  13156. vaesenc xmm15, xmm15, xmm0
  13157. ; aesenc_pclmul_n
  13158. vmovdqu xmm1, OWORD PTR [rcx+96]
  13159. vmovdqu xmm0, OWORD PTR [rsp+16]
  13160. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13161. vpxor xmm5, xmm5, xmm2
  13162. vpclmulqdq xmm2, xmm1, xmm0, 16
  13163. vpxor xmm5, xmm5, xmm3
  13164. vpclmulqdq xmm3, xmm1, xmm0, 1
  13165. vpxor xmm6, xmm6, xmm4
  13166. vpclmulqdq xmm4, xmm1, xmm0, 0
  13167. vpclmulqdq xmm1, xmm1, xmm0, 17
  13168. vmovdqu xmm0, OWORD PTR [rsi+112]
  13169. vpxor xmm7, xmm7, xmm1
  13170. vaesenc xmm8, xmm8, xmm0
  13171. vaesenc xmm9, xmm9, xmm0
  13172. vaesenc xmm10, xmm10, xmm0
  13173. vaesenc xmm11, xmm11, xmm0
  13174. vaesenc xmm12, xmm12, xmm0
  13175. vaesenc xmm13, xmm13, xmm0
  13176. vaesenc xmm14, xmm14, xmm0
  13177. vaesenc xmm15, xmm15, xmm0
  13178. ; aesenc_pclmul_n
  13179. vmovdqu xmm1, OWORD PTR [rcx+112]
  13180. vmovdqu xmm0, OWORD PTR [rsp]
  13181. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13182. vpxor xmm5, xmm5, xmm2
  13183. vpclmulqdq xmm2, xmm1, xmm0, 16
  13184. vpxor xmm5, xmm5, xmm3
  13185. vpclmulqdq xmm3, xmm1, xmm0, 1
  13186. vpxor xmm6, xmm6, xmm4
  13187. vpclmulqdq xmm4, xmm1, xmm0, 0
  13188. vpclmulqdq xmm1, xmm1, xmm0, 17
  13189. vmovdqu xmm0, OWORD PTR [rsi+128]
  13190. vpxor xmm7, xmm7, xmm1
  13191. vaesenc xmm8, xmm8, xmm0
  13192. vaesenc xmm9, xmm9, xmm0
  13193. vaesenc xmm10, xmm10, xmm0
  13194. vaesenc xmm11, xmm11, xmm0
  13195. vaesenc xmm12, xmm12, xmm0
  13196. vaesenc xmm13, xmm13, xmm0
  13197. vaesenc xmm14, xmm14, xmm0
  13198. vaesenc xmm15, xmm15, xmm0
  13199. ; aesenc_pclmul_l
  13200. vpxor xmm5, xmm5, xmm2
  13201. vpxor xmm6, xmm6, xmm4
  13202. vpxor xmm5, xmm5, xmm3
  13203. vpslldq xmm1, xmm5, 8
  13204. vpsrldq xmm5, xmm5, 8
  13205. vmovdqu xmm4, OWORD PTR [rsi+144]
  13206. vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
  13207. vaesenc xmm8, xmm8, xmm4
  13208. vpxor xmm6, xmm6, xmm1
  13209. vpxor xmm7, xmm7, xmm5
  13210. vpclmulqdq xmm3, xmm6, xmm0, 16
  13211. vaesenc xmm9, xmm9, xmm4
  13212. vaesenc xmm10, xmm10, xmm4
  13213. vaesenc xmm11, xmm11, xmm4
  13214. vpshufd xmm6, xmm6, 78
  13215. vpxor xmm6, xmm6, xmm3
  13216. vpclmulqdq xmm3, xmm6, xmm0, 16
  13217. vaesenc xmm12, xmm12, xmm4
  13218. vaesenc xmm13, xmm13, xmm4
  13219. vaesenc xmm14, xmm14, xmm4
  13220. vpshufd xmm6, xmm6, 78
  13221. vpxor xmm6, xmm6, xmm3
  13222. vpxor xmm6, xmm6, xmm7
  13223. vaesenc xmm15, xmm15, xmm4
  13224. cmp r9d, 11
  13225. vmovdqu xmm7, OWORD PTR [rsi+160]
  13226. jl L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done
  13227. vaesenc xmm8, xmm8, xmm7
  13228. vaesenc xmm9, xmm9, xmm7
  13229. vaesenc xmm10, xmm10, xmm7
  13230. vaesenc xmm11, xmm11, xmm7
  13231. vaesenc xmm12, xmm12, xmm7
  13232. vaesenc xmm13, xmm13, xmm7
  13233. vaesenc xmm14, xmm14, xmm7
  13234. vaesenc xmm15, xmm15, xmm7
  13235. vmovdqu xmm7, OWORD PTR [rsi+176]
  13236. vaesenc xmm8, xmm8, xmm7
  13237. vaesenc xmm9, xmm9, xmm7
  13238. vaesenc xmm10, xmm10, xmm7
  13239. vaesenc xmm11, xmm11, xmm7
  13240. vaesenc xmm12, xmm12, xmm7
  13241. vaesenc xmm13, xmm13, xmm7
  13242. vaesenc xmm14, xmm14, xmm7
  13243. vaesenc xmm15, xmm15, xmm7
  13244. cmp r9d, 13
  13245. vmovdqu xmm7, OWORD PTR [rsi+192]
  13246. jl L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done
  13247. vaesenc xmm8, xmm8, xmm7
  13248. vaesenc xmm9, xmm9, xmm7
  13249. vaesenc xmm10, xmm10, xmm7
  13250. vaesenc xmm11, xmm11, xmm7
  13251. vaesenc xmm12, xmm12, xmm7
  13252. vaesenc xmm13, xmm13, xmm7
  13253. vaesenc xmm14, xmm14, xmm7
  13254. vaesenc xmm15, xmm15, xmm7
  13255. vmovdqu xmm7, OWORD PTR [rsi+208]
  13256. vaesenc xmm8, xmm8, xmm7
  13257. vaesenc xmm9, xmm9, xmm7
  13258. vaesenc xmm10, xmm10, xmm7
  13259. vaesenc xmm11, xmm11, xmm7
  13260. vaesenc xmm12, xmm12, xmm7
  13261. vaesenc xmm13, xmm13, xmm7
  13262. vaesenc xmm14, xmm14, xmm7
  13263. vaesenc xmm15, xmm15, xmm7
  13264. vmovdqu xmm7, OWORD PTR [rsi+224]
  13265. L_AES_GCM_decrypt_avx2_aesenc_128_ghash_avx_done:
  13266. ; aesenc_last
  13267. vaesenclast xmm8, xmm8, xmm7
  13268. vaesenclast xmm9, xmm9, xmm7
  13269. vaesenclast xmm10, xmm10, xmm7
  13270. vaesenclast xmm11, xmm11, xmm7
  13271. vmovdqu xmm0, OWORD PTR [rcx]
  13272. vmovdqu xmm1, OWORD PTR [rcx+16]
  13273. vmovdqu xmm2, OWORD PTR [rcx+32]
  13274. vmovdqu xmm3, OWORD PTR [rcx+48]
  13275. vpxor xmm8, xmm8, xmm0
  13276. vpxor xmm9, xmm9, xmm1
  13277. vpxor xmm10, xmm10, xmm2
  13278. vpxor xmm11, xmm11, xmm3
  13279. vmovdqu OWORD PTR [rdx], xmm8
  13280. vmovdqu OWORD PTR [rdx+16], xmm9
  13281. vmovdqu OWORD PTR [rdx+32], xmm10
  13282. vmovdqu OWORD PTR [rdx+48], xmm11
  13283. vaesenclast xmm12, xmm12, xmm7
  13284. vaesenclast xmm13, xmm13, xmm7
  13285. vaesenclast xmm14, xmm14, xmm7
  13286. vaesenclast xmm15, xmm15, xmm7
  13287. vmovdqu xmm0, OWORD PTR [rcx+64]
  13288. vmovdqu xmm1, OWORD PTR [rcx+80]
  13289. vmovdqu xmm2, OWORD PTR [rcx+96]
  13290. vmovdqu xmm3, OWORD PTR [rcx+112]
  13291. vpxor xmm12, xmm12, xmm0
  13292. vpxor xmm13, xmm13, xmm1
  13293. vpxor xmm14, xmm14, xmm2
  13294. vpxor xmm15, xmm15, xmm3
  13295. vmovdqu OWORD PTR [rdx+64], xmm12
  13296. vmovdqu OWORD PTR [rdx+80], xmm13
  13297. vmovdqu OWORD PTR [rdx+96], xmm14
  13298. vmovdqu OWORD PTR [rdx+112], xmm15
  13299. ; aesenc_128_ghash - end
  13300. add ebx, 128
  13301. cmp ebx, r13d
  13302. jl L_AES_GCM_decrypt_avx2_ghash_128
  13303. vmovdqu xmm5, OWORD PTR [rsp]
  13304. vmovdqu xmm4, OWORD PTR [rsp+128]
  13305. vmovdqu xmm15, OWORD PTR [rsp+144]
  13306. L_AES_GCM_decrypt_avx2_done_128:
  13307. cmp ebx, r10d
  13308. jge L_AES_GCM_decrypt_avx2_done_dec
  13309. mov r13d, r10d
  13310. and r13d, 4294967280
  13311. cmp ebx, r13d
  13312. jge L_AES_GCM_decrypt_avx2_last_block_done
  13313. L_AES_GCM_decrypt_avx2_last_block_start:
  13314. vmovdqu xmm11, OWORD PTR [rdi+rbx]
  13315. vpshufb xmm10, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  13316. vpshufb xmm12, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13317. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  13318. vpxor xmm12, xmm12, xmm6
  13319. ; aesenc_gfmul_sb
  13320. vpclmulqdq xmm2, xmm12, xmm5, 1
  13321. vpclmulqdq xmm3, xmm12, xmm5, 16
  13322. vpclmulqdq xmm1, xmm12, xmm5, 0
  13323. vpclmulqdq xmm8, xmm12, xmm5, 17
  13324. vpxor xmm10, xmm10, [rsi]
  13325. vaesenc xmm10, xmm10, [rsi+16]
  13326. vpxor xmm3, xmm3, xmm2
  13327. vpslldq xmm2, xmm3, 8
  13328. vpsrldq xmm3, xmm3, 8
  13329. vaesenc xmm10, xmm10, [rsi+32]
  13330. vpxor xmm2, xmm2, xmm1
  13331. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13332. vaesenc xmm10, xmm10, [rsi+48]
  13333. vaesenc xmm10, xmm10, [rsi+64]
  13334. vaesenc xmm10, xmm10, [rsi+80]
  13335. vpshufd xmm2, xmm2, 78
  13336. vpxor xmm2, xmm2, xmm1
  13337. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13338. vaesenc xmm10, xmm10, [rsi+96]
  13339. vaesenc xmm10, xmm10, [rsi+112]
  13340. vaesenc xmm10, xmm10, [rsi+128]
  13341. vpshufd xmm2, xmm2, 78
  13342. vaesenc xmm10, xmm10, [rsi+144]
  13343. vpxor xmm8, xmm8, xmm3
  13344. vpxor xmm2, xmm2, xmm8
  13345. vmovdqu xmm0, OWORD PTR [rsi+160]
  13346. cmp r9d, 11
  13347. jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last
  13348. vaesenc xmm10, xmm10, xmm0
  13349. vaesenc xmm10, xmm10, [rsi+176]
  13350. vmovdqu xmm0, OWORD PTR [rsi+192]
  13351. cmp r9d, 13
  13352. jl L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last
  13353. vaesenc xmm10, xmm10, xmm0
  13354. vaesenc xmm10, xmm10, [rsi+208]
  13355. vmovdqu xmm0, OWORD PTR [rsi+224]
  13356. L_AES_GCM_decrypt_avx2_aesenc_gfmul_sb_last:
  13357. vaesenclast xmm10, xmm10, xmm0
  13358. vpxor xmm6, xmm2, xmm1
  13359. vpxor xmm10, xmm10, xmm11
  13360. vmovdqu OWORD PTR [r8+rbx], xmm10
  13361. add ebx, 16
  13362. cmp ebx, r13d
  13363. jl L_AES_GCM_decrypt_avx2_last_block_start
  13364. L_AES_GCM_decrypt_avx2_last_block_done:
  13365. mov ecx, r10d
  13366. mov edx, r10d
  13367. and ecx, 15
  13368. jz L_AES_GCM_decrypt_avx2_done_dec
  13369. ; aesenc_last15_dec
  13370. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  13371. vpxor xmm4, xmm4, [rsi]
  13372. vaesenc xmm4, xmm4, [rsi+16]
  13373. vaesenc xmm4, xmm4, [rsi+32]
  13374. vaesenc xmm4, xmm4, [rsi+48]
  13375. vaesenc xmm4, xmm4, [rsi+64]
  13376. vaesenc xmm4, xmm4, [rsi+80]
  13377. vaesenc xmm4, xmm4, [rsi+96]
  13378. vaesenc xmm4, xmm4, [rsi+112]
  13379. vaesenc xmm4, xmm4, [rsi+128]
  13380. vaesenc xmm4, xmm4, [rsi+144]
  13381. cmp r9d, 11
  13382. vmovdqu xmm1, OWORD PTR [rsi+160]
  13383. jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last
  13384. vaesenc xmm4, xmm4, xmm1
  13385. vaesenc xmm4, xmm4, [rsi+176]
  13386. cmp r9d, 13
  13387. vmovdqu xmm1, OWORD PTR [rsi+192]
  13388. jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last
  13389. vaesenc xmm4, xmm4, xmm1
  13390. vaesenc xmm4, xmm4, [rsi+208]
  13391. vmovdqu xmm1, OWORD PTR [rsi+224]
  13392. L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_aesenc_avx_last:
  13393. vaesenclast xmm4, xmm4, xmm1
  13394. xor ecx, ecx
  13395. vpxor xmm0, xmm0, xmm0
  13396. vmovdqu OWORD PTR [rsp], xmm4
  13397. vmovdqu OWORD PTR [rsp+16], xmm0
  13398. L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop:
  13399. movzx r13d, BYTE PTR [rdi+rbx]
  13400. mov BYTE PTR [rsp+rcx+16], r13b
  13401. xor r13b, BYTE PTR [rsp+rcx]
  13402. mov BYTE PTR [r8+rbx], r13b
  13403. inc ebx
  13404. inc ecx
  13405. cmp ebx, edx
  13406. jl L_AES_GCM_decrypt_avx2_aesenc_last15_dec_avx_loop
  13407. vmovdqu xmm4, OWORD PTR [rsp+16]
  13408. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13409. vpxor xmm6, xmm6, xmm4
  13410. ; ghash_gfmul_red
  13411. vpclmulqdq xmm2, xmm6, xmm5, 16
  13412. vpclmulqdq xmm1, xmm6, xmm5, 1
  13413. vpclmulqdq xmm0, xmm6, xmm5, 0
  13414. vpxor xmm2, xmm2, xmm1
  13415. vpslldq xmm1, xmm2, 8
  13416. vpsrldq xmm2, xmm2, 8
  13417. vpxor xmm1, xmm1, xmm0
  13418. vpclmulqdq xmm6, xmm6, xmm5, 17
  13419. vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13420. vpshufd xmm1, xmm1, 78
  13421. vpxor xmm1, xmm1, xmm0
  13422. vpclmulqdq xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13423. vpshufd xmm1, xmm1, 78
  13424. vpxor xmm6, xmm6, xmm2
  13425. vpxor xmm6, xmm6, xmm1
  13426. vpxor xmm6, xmm6, xmm0
  13427. L_AES_GCM_decrypt_avx2_done_dec:
  13428. ; calc_tag
  13429. shl r10, 3
  13430. shl r11, 3
  13431. vmovq xmm0, r10
  13432. vmovq xmm1, r11
  13433. vpunpcklqdq xmm0, xmm0, xmm1
  13434. vpxor xmm0, xmm0, xmm6
  13435. ; ghash_gfmul_red
  13436. vpclmulqdq xmm4, xmm0, xmm5, 16
  13437. vpclmulqdq xmm3, xmm0, xmm5, 1
  13438. vpclmulqdq xmm2, xmm0, xmm5, 0
  13439. vpxor xmm4, xmm4, xmm3
  13440. vpslldq xmm3, xmm4, 8
  13441. vpsrldq xmm4, xmm4, 8
  13442. vpxor xmm3, xmm3, xmm2
  13443. vpclmulqdq xmm0, xmm0, xmm5, 17
  13444. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13445. vpshufd xmm3, xmm3, 78
  13446. vpxor xmm3, xmm3, xmm2
  13447. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  13448. vpshufd xmm3, xmm3, 78
  13449. vpxor xmm0, xmm0, xmm4
  13450. vpxor xmm0, xmm0, xmm3
  13451. vpxor xmm0, xmm0, xmm2
  13452. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13453. vpxor xmm0, xmm0, xmm15
  13454. ; cmp_tag
  13455. cmp r15d, 16
  13456. je L_AES_GCM_decrypt_avx2_cmp_tag_16
  13457. xor rdx, rdx
  13458. xor rax, rax
  13459. vmovdqu OWORD PTR [rsp], xmm0
  13460. L_AES_GCM_decrypt_avx2_cmp_tag_loop:
  13461. movzx r13d, BYTE PTR [rsp+rdx]
  13462. xor r13b, BYTE PTR [r14+rdx]
  13463. or al, r13b
  13464. inc edx
  13465. cmp edx, r15d
  13466. jne L_AES_GCM_decrypt_avx2_cmp_tag_loop
  13467. cmp rax, 0
  13468. sete al
  13469. jmp L_AES_GCM_decrypt_avx2_cmp_tag_done
  13470. L_AES_GCM_decrypt_avx2_cmp_tag_16:
  13471. vmovdqu xmm1, OWORD PTR [r14]
  13472. vpcmpeqb xmm0, xmm0, xmm1
  13473. vpmovmskb rdx, xmm0
  13474. ; %%edx == 0xFFFF then return 1 else => return 0
  13475. xor eax, eax
  13476. cmp edx, 65535
  13477. sete al
  13478. L_AES_GCM_decrypt_avx2_cmp_tag_done:
  13479. mov DWORD PTR [rbp], eax
  13480. vzeroupper
  13481. add rsp, 168
  13482. pop rbp
  13483. pop rsi
  13484. pop r15
  13485. pop rbx
  13486. pop r14
  13487. pop r12
  13488. pop rdi
  13489. pop r13
  13490. ret
  13491. AES_GCM_decrypt_avx2 ENDP
  13492. _text ENDS
  13493. _text SEGMENT READONLY PARA
  13494. AES_GCM_init_avx2 PROC
  13495. push rbx
  13496. push rdi
  13497. push rsi
  13498. push r12
  13499. mov rdi, rcx
  13500. mov rsi, rdx
  13501. mov r10, r8
  13502. mov r11d, r9d
  13503. mov rax, QWORD PTR [rsp+72]
  13504. mov r8, QWORD PTR [rsp+80]
  13505. mov r9, QWORD PTR [rsp+88]
  13506. sub rsp, 16
  13507. vpxor xmm4, xmm4, xmm4
  13508. mov edx, r11d
  13509. cmp edx, 12
  13510. je L_AES_GCM_init_avx2_iv_12
  13511. ; Calculate values when IV is not 12 bytes
  13512. ; H = Encrypt X(=0)
  13513. vmovdqu xmm5, OWORD PTR [rdi]
  13514. vaesenc xmm5, xmm5, [rdi+16]
  13515. vaesenc xmm5, xmm5, [rdi+32]
  13516. vaesenc xmm5, xmm5, [rdi+48]
  13517. vaesenc xmm5, xmm5, [rdi+64]
  13518. vaesenc xmm5, xmm5, [rdi+80]
  13519. vaesenc xmm5, xmm5, [rdi+96]
  13520. vaesenc xmm5, xmm5, [rdi+112]
  13521. vaesenc xmm5, xmm5, [rdi+128]
  13522. vaesenc xmm5, xmm5, [rdi+144]
  13523. cmp esi, 11
  13524. vmovdqu xmm0, OWORD PTR [rdi+160]
  13525. jl L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last
  13526. vaesenc xmm5, xmm5, xmm0
  13527. vaesenc xmm5, xmm5, [rdi+176]
  13528. cmp esi, 13
  13529. vmovdqu xmm0, OWORD PTR [rdi+192]
  13530. jl L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last
  13531. vaesenc xmm5, xmm5, xmm0
  13532. vaesenc xmm5, xmm5, [rdi+208]
  13533. vmovdqu xmm0, OWORD PTR [rdi+224]
  13534. L_AES_GCM_init_avx2_calc_iv_1_aesenc_avx_last:
  13535. vaesenclast xmm5, xmm5, xmm0
  13536. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13537. ; Calc counter
  13538. ; Initialization vector
  13539. cmp edx, 0
  13540. mov rcx, 0
  13541. je L_AES_GCM_init_avx2_calc_iv_done
  13542. cmp edx, 16
  13543. jl L_AES_GCM_init_avx2_calc_iv_lt16
  13544. and edx, 4294967280
  13545. L_AES_GCM_init_avx2_calc_iv_16_loop:
  13546. vmovdqu xmm0, OWORD PTR [r10+rcx]
  13547. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13548. vpxor xmm4, xmm4, xmm0
  13549. ; ghash_gfmul_avx
  13550. vpclmulqdq xmm2, xmm5, xmm4, 16
  13551. vpclmulqdq xmm1, xmm5, xmm4, 1
  13552. vpclmulqdq xmm0, xmm5, xmm4, 0
  13553. vpclmulqdq xmm3, xmm5, xmm4, 17
  13554. vpxor xmm2, xmm2, xmm1
  13555. vpslldq xmm1, xmm2, 8
  13556. vpsrldq xmm2, xmm2, 8
  13557. vpxor xmm6, xmm0, xmm1
  13558. vpxor xmm4, xmm3, xmm2
  13559. ; ghash_mid
  13560. vpsrld xmm0, xmm6, 31
  13561. vpsrld xmm1, xmm4, 31
  13562. vpslld xmm6, xmm6, 1
  13563. vpslld xmm4, xmm4, 1
  13564. vpsrldq xmm2, xmm0, 12
  13565. vpslldq xmm0, xmm0, 4
  13566. vpslldq xmm1, xmm1, 4
  13567. vpor xmm4, xmm4, xmm2
  13568. vpor xmm6, xmm6, xmm0
  13569. vpor xmm4, xmm4, xmm1
  13570. ; ghash_red
  13571. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  13572. vpclmulqdq xmm0, xmm6, xmm2, 16
  13573. vpshufd xmm1, xmm6, 78
  13574. vpxor xmm1, xmm1, xmm0
  13575. vpclmulqdq xmm0, xmm1, xmm2, 16
  13576. vpshufd xmm1, xmm1, 78
  13577. vpxor xmm1, xmm1, xmm0
  13578. vpxor xmm4, xmm4, xmm1
  13579. add ecx, 16
  13580. cmp ecx, edx
  13581. jl L_AES_GCM_init_avx2_calc_iv_16_loop
  13582. mov edx, r11d
  13583. cmp ecx, edx
  13584. je L_AES_GCM_init_avx2_calc_iv_done
  13585. L_AES_GCM_init_avx2_calc_iv_lt16:
  13586. vpxor xmm0, xmm0, xmm0
  13587. xor ebx, ebx
  13588. vmovdqu OWORD PTR [rsp], xmm0
  13589. L_AES_GCM_init_avx2_calc_iv_loop:
  13590. movzx r12d, BYTE PTR [r10+rcx]
  13591. mov BYTE PTR [rsp+rbx], r12b
  13592. inc ecx
  13593. inc ebx
  13594. cmp ecx, edx
  13595. jl L_AES_GCM_init_avx2_calc_iv_loop
  13596. vmovdqu xmm0, OWORD PTR [rsp]
  13597. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13598. vpxor xmm4, xmm4, xmm0
  13599. ; ghash_gfmul_avx
  13600. vpclmulqdq xmm2, xmm5, xmm4, 16
  13601. vpclmulqdq xmm1, xmm5, xmm4, 1
  13602. vpclmulqdq xmm0, xmm5, xmm4, 0
  13603. vpclmulqdq xmm3, xmm5, xmm4, 17
  13604. vpxor xmm2, xmm2, xmm1
  13605. vpslldq xmm1, xmm2, 8
  13606. vpsrldq xmm2, xmm2, 8
  13607. vpxor xmm6, xmm0, xmm1
  13608. vpxor xmm4, xmm3, xmm2
  13609. ; ghash_mid
  13610. vpsrld xmm0, xmm6, 31
  13611. vpsrld xmm1, xmm4, 31
  13612. vpslld xmm6, xmm6, 1
  13613. vpslld xmm4, xmm4, 1
  13614. vpsrldq xmm2, xmm0, 12
  13615. vpslldq xmm0, xmm0, 4
  13616. vpslldq xmm1, xmm1, 4
  13617. vpor xmm4, xmm4, xmm2
  13618. vpor xmm6, xmm6, xmm0
  13619. vpor xmm4, xmm4, xmm1
  13620. ; ghash_red
  13621. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  13622. vpclmulqdq xmm0, xmm6, xmm2, 16
  13623. vpshufd xmm1, xmm6, 78
  13624. vpxor xmm1, xmm1, xmm0
  13625. vpclmulqdq xmm0, xmm1, xmm2, 16
  13626. vpshufd xmm1, xmm1, 78
  13627. vpxor xmm1, xmm1, xmm0
  13628. vpxor xmm4, xmm4, xmm1
  13629. L_AES_GCM_init_avx2_calc_iv_done:
  13630. ; T = Encrypt counter
  13631. vpxor xmm0, xmm0, xmm0
  13632. shl edx, 3
  13633. vmovq xmm0, rdx
  13634. vpxor xmm4, xmm4, xmm0
  13635. ; ghash_gfmul_avx
  13636. vpclmulqdq xmm2, xmm5, xmm4, 16
  13637. vpclmulqdq xmm1, xmm5, xmm4, 1
  13638. vpclmulqdq xmm0, xmm5, xmm4, 0
  13639. vpclmulqdq xmm3, xmm5, xmm4, 17
  13640. vpxor xmm2, xmm2, xmm1
  13641. vpslldq xmm1, xmm2, 8
  13642. vpsrldq xmm2, xmm2, 8
  13643. vpxor xmm6, xmm0, xmm1
  13644. vpxor xmm4, xmm3, xmm2
  13645. ; ghash_mid
  13646. vpsrld xmm0, xmm6, 31
  13647. vpsrld xmm1, xmm4, 31
  13648. vpslld xmm6, xmm6, 1
  13649. vpslld xmm4, xmm4, 1
  13650. vpsrldq xmm2, xmm0, 12
  13651. vpslldq xmm0, xmm0, 4
  13652. vpslldq xmm1, xmm1, 4
  13653. vpor xmm4, xmm4, xmm2
  13654. vpor xmm6, xmm6, xmm0
  13655. vpor xmm4, xmm4, xmm1
  13656. ; ghash_red
  13657. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  13658. vpclmulqdq xmm0, xmm6, xmm2, 16
  13659. vpshufd xmm1, xmm6, 78
  13660. vpxor xmm1, xmm1, xmm0
  13661. vpclmulqdq xmm0, xmm1, xmm2, 16
  13662. vpshufd xmm1, xmm1, 78
  13663. vpxor xmm1, xmm1, xmm0
  13664. vpxor xmm4, xmm4, xmm1
  13665. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13666. ; Encrypt counter
  13667. vmovdqu xmm7, OWORD PTR [rdi]
  13668. vpxor xmm7, xmm7, xmm4
  13669. vaesenc xmm7, xmm7, [rdi+16]
  13670. vaesenc xmm7, xmm7, [rdi+32]
  13671. vaesenc xmm7, xmm7, [rdi+48]
  13672. vaesenc xmm7, xmm7, [rdi+64]
  13673. vaesenc xmm7, xmm7, [rdi+80]
  13674. vaesenc xmm7, xmm7, [rdi+96]
  13675. vaesenc xmm7, xmm7, [rdi+112]
  13676. vaesenc xmm7, xmm7, [rdi+128]
  13677. vaesenc xmm7, xmm7, [rdi+144]
  13678. cmp esi, 11
  13679. vmovdqu xmm0, OWORD PTR [rdi+160]
  13680. jl L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last
  13681. vaesenc xmm7, xmm7, xmm0
  13682. vaesenc xmm7, xmm7, [rdi+176]
  13683. cmp esi, 13
  13684. vmovdqu xmm0, OWORD PTR [rdi+192]
  13685. jl L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last
  13686. vaesenc xmm7, xmm7, xmm0
  13687. vaesenc xmm7, xmm7, [rdi+208]
  13688. vmovdqu xmm0, OWORD PTR [rdi+224]
  13689. L_AES_GCM_init_avx2_calc_iv_2_aesenc_avx_last:
  13690. vaesenclast xmm7, xmm7, xmm0
  13691. jmp L_AES_GCM_init_avx2_iv_done
  13692. L_AES_GCM_init_avx2_iv_12:
  13693. ; # Calculate values when IV is 12 bytes
  13694. ; Set counter based on IV
  13695. vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_one
  13696. vmovdqu xmm5, OWORD PTR [rdi]
  13697. vpblendd xmm4, xmm4, [r10], 7
  13698. ; H = Encrypt X(=0) and T = Encrypt counter
  13699. vmovdqu xmm6, OWORD PTR [rdi+16]
  13700. vpxor xmm7, xmm4, xmm5
  13701. vaesenc xmm5, xmm5, xmm6
  13702. vaesenc xmm7, xmm7, xmm6
  13703. vmovdqu xmm0, OWORD PTR [rdi+32]
  13704. vaesenc xmm5, xmm5, xmm0
  13705. vaesenc xmm7, xmm7, xmm0
  13706. vmovdqu xmm0, OWORD PTR [rdi+48]
  13707. vaesenc xmm5, xmm5, xmm0
  13708. vaesenc xmm7, xmm7, xmm0
  13709. vmovdqu xmm0, OWORD PTR [rdi+64]
  13710. vaesenc xmm5, xmm5, xmm0
  13711. vaesenc xmm7, xmm7, xmm0
  13712. vmovdqu xmm0, OWORD PTR [rdi+80]
  13713. vaesenc xmm5, xmm5, xmm0
  13714. vaesenc xmm7, xmm7, xmm0
  13715. vmovdqu xmm0, OWORD PTR [rdi+96]
  13716. vaesenc xmm5, xmm5, xmm0
  13717. vaesenc xmm7, xmm7, xmm0
  13718. vmovdqu xmm0, OWORD PTR [rdi+112]
  13719. vaesenc xmm5, xmm5, xmm0
  13720. vaesenc xmm7, xmm7, xmm0
  13721. vmovdqu xmm0, OWORD PTR [rdi+128]
  13722. vaesenc xmm5, xmm5, xmm0
  13723. vaesenc xmm7, xmm7, xmm0
  13724. vmovdqu xmm0, OWORD PTR [rdi+144]
  13725. vaesenc xmm5, xmm5, xmm0
  13726. vaesenc xmm7, xmm7, xmm0
  13727. cmp esi, 11
  13728. vmovdqu xmm0, OWORD PTR [rdi+160]
  13729. jl L_AES_GCM_init_avx2_calc_iv_12_last
  13730. vaesenc xmm5, xmm5, xmm0
  13731. vaesenc xmm7, xmm7, xmm0
  13732. vmovdqu xmm0, OWORD PTR [rdi+176]
  13733. vaesenc xmm5, xmm5, xmm0
  13734. vaesenc xmm7, xmm7, xmm0
  13735. cmp esi, 13
  13736. vmovdqu xmm0, OWORD PTR [rdi+192]
  13737. jl L_AES_GCM_init_avx2_calc_iv_12_last
  13738. vaesenc xmm5, xmm5, xmm0
  13739. vaesenc xmm7, xmm7, xmm0
  13740. vmovdqu xmm0, OWORD PTR [rdi+208]
  13741. vaesenc xmm5, xmm5, xmm0
  13742. vaesenc xmm7, xmm7, xmm0
  13743. vmovdqu xmm0, OWORD PTR [rdi+224]
  13744. L_AES_GCM_init_avx2_calc_iv_12_last:
  13745. vaesenclast xmm5, xmm5, xmm0
  13746. vaesenclast xmm7, xmm7, xmm0
  13747. vpshufb xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13748. L_AES_GCM_init_avx2_iv_done:
  13749. vmovdqu OWORD PTR [r9], xmm7
  13750. vpshufb xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  13751. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  13752. vmovdqu OWORD PTR [rax], xmm5
  13753. vmovdqu OWORD PTR [r8], xmm4
  13754. vzeroupper
  13755. add rsp, 16
  13756. pop r12
  13757. pop rsi
  13758. pop rdi
  13759. pop rbx
  13760. ret
  13761. AES_GCM_init_avx2 ENDP
  13762. _text ENDS
  13763. _text SEGMENT READONLY PARA
  13764. AES_GCM_aad_update_avx2 PROC
  13765. mov rax, rcx
  13766. vmovdqu xmm4, OWORD PTR [r8]
  13767. vmovdqu xmm5, OWORD PTR [r9]
  13768. xor ecx, ecx
  13769. L_AES_GCM_aad_update_avx2_16_loop:
  13770. vmovdqu xmm0, OWORD PTR [rax+rcx]
  13771. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13772. vpxor xmm4, xmm4, xmm0
  13773. ; ghash_gfmul_avx
  13774. vpclmulqdq xmm2, xmm5, xmm4, 16
  13775. vpclmulqdq xmm1, xmm5, xmm4, 1
  13776. vpclmulqdq xmm0, xmm5, xmm4, 0
  13777. vpclmulqdq xmm3, xmm5, xmm4, 17
  13778. vpxor xmm2, xmm2, xmm1
  13779. vpslldq xmm1, xmm2, 8
  13780. vpsrldq xmm2, xmm2, 8
  13781. vpxor xmm6, xmm0, xmm1
  13782. vpxor xmm4, xmm3, xmm2
  13783. ; ghash_mid
  13784. vpsrld xmm0, xmm6, 31
  13785. vpsrld xmm1, xmm4, 31
  13786. vpslld xmm6, xmm6, 1
  13787. vpslld xmm4, xmm4, 1
  13788. vpsrldq xmm2, xmm0, 12
  13789. vpslldq xmm0, xmm0, 4
  13790. vpslldq xmm1, xmm1, 4
  13791. vpor xmm4, xmm4, xmm2
  13792. vpor xmm6, xmm6, xmm0
  13793. vpor xmm4, xmm4, xmm1
  13794. ; ghash_red
  13795. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  13796. vpclmulqdq xmm0, xmm6, xmm2, 16
  13797. vpshufd xmm1, xmm6, 78
  13798. vpxor xmm1, xmm1, xmm0
  13799. vpclmulqdq xmm0, xmm1, xmm2, 16
  13800. vpshufd xmm1, xmm1, 78
  13801. vpxor xmm1, xmm1, xmm0
  13802. vpxor xmm4, xmm4, xmm1
  13803. add ecx, 16
  13804. cmp ecx, edx
  13805. jl L_AES_GCM_aad_update_avx2_16_loop
  13806. vmovdqu OWORD PTR [r8], xmm4
  13807. vzeroupper
  13808. ret
  13809. AES_GCM_aad_update_avx2 ENDP
  13810. _text ENDS
  13811. _text SEGMENT READONLY PARA
  13812. AES_GCM_encrypt_block_avx2 PROC
  13813. mov r10, r8
  13814. mov r11, r9
  13815. mov rax, QWORD PTR [rsp+40]
  13816. sub rsp, 152
  13817. vmovdqu xmm3, OWORD PTR [rax]
  13818. ; aesenc_block
  13819. vmovdqu xmm1, xmm3
  13820. vpshufb xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  13821. vpaddd xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_one
  13822. vpxor xmm0, xmm0, [rcx]
  13823. vmovdqu xmm2, OWORD PTR [rcx+16]
  13824. vaesenc xmm0, xmm0, xmm2
  13825. vmovdqu xmm2, OWORD PTR [rcx+32]
  13826. vaesenc xmm0, xmm0, xmm2
  13827. vmovdqu xmm2, OWORD PTR [rcx+48]
  13828. vaesenc xmm0, xmm0, xmm2
  13829. vmovdqu xmm2, OWORD PTR [rcx+64]
  13830. vaesenc xmm0, xmm0, xmm2
  13831. vmovdqu xmm2, OWORD PTR [rcx+80]
  13832. vaesenc xmm0, xmm0, xmm2
  13833. vmovdqu xmm2, OWORD PTR [rcx+96]
  13834. vaesenc xmm0, xmm0, xmm2
  13835. vmovdqu xmm2, OWORD PTR [rcx+112]
  13836. vaesenc xmm0, xmm0, xmm2
  13837. vmovdqu xmm2, OWORD PTR [rcx+128]
  13838. vaesenc xmm0, xmm0, xmm2
  13839. vmovdqu xmm2, OWORD PTR [rcx+144]
  13840. vaesenc xmm0, xmm0, xmm2
  13841. vmovdqu xmm3, xmm1
  13842. cmp edx, 11
  13843. vmovdqu xmm1, OWORD PTR [rcx+160]
  13844. jl L_AES_GCM_encrypt_block_avx2_aesenc_block_last
  13845. vaesenc xmm0, xmm0, xmm1
  13846. vmovdqu xmm2, OWORD PTR [rcx+176]
  13847. vaesenc xmm0, xmm0, xmm2
  13848. cmp edx, 13
  13849. vmovdqu xmm1, OWORD PTR [rcx+192]
  13850. jl L_AES_GCM_encrypt_block_avx2_aesenc_block_last
  13851. vaesenc xmm0, xmm0, xmm1
  13852. vmovdqu xmm2, OWORD PTR [rcx+208]
  13853. vaesenc xmm0, xmm0, xmm2
  13854. vmovdqu xmm1, OWORD PTR [rcx+224]
  13855. L_AES_GCM_encrypt_block_avx2_aesenc_block_last:
  13856. vaesenclast xmm0, xmm0, xmm1
  13857. vmovdqu xmm1, OWORD PTR [r11]
  13858. vpxor xmm0, xmm0, xmm1
  13859. vmovdqu OWORD PTR [r10], xmm0
  13860. vmovdqu OWORD PTR [rax], xmm3
  13861. vzeroupper
  13862. add rsp, 152
  13863. ret
  13864. AES_GCM_encrypt_block_avx2 ENDP
  13865. _text ENDS
  13866. _text SEGMENT READONLY PARA
  13867. AES_GCM_ghash_block_avx2 PROC
  13868. vmovdqu xmm4, OWORD PTR [rdx]
  13869. vmovdqu xmm5, OWORD PTR [r8]
  13870. vmovdqu xmm0, OWORD PTR [rcx]
  13871. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  13872. vpxor xmm4, xmm4, xmm0
  13873. ; ghash_gfmul_avx
  13874. vpclmulqdq xmm2, xmm5, xmm4, 16
  13875. vpclmulqdq xmm1, xmm5, xmm4, 1
  13876. vpclmulqdq xmm0, xmm5, xmm4, 0
  13877. vpclmulqdq xmm3, xmm5, xmm4, 17
  13878. vpxor xmm2, xmm2, xmm1
  13879. vpslldq xmm1, xmm2, 8
  13880. vpsrldq xmm2, xmm2, 8
  13881. vpxor xmm6, xmm0, xmm1
  13882. vpxor xmm4, xmm3, xmm2
  13883. ; ghash_mid
  13884. vpsrld xmm0, xmm6, 31
  13885. vpsrld xmm1, xmm4, 31
  13886. vpslld xmm6, xmm6, 1
  13887. vpslld xmm4, xmm4, 1
  13888. vpsrldq xmm2, xmm0, 12
  13889. vpslldq xmm0, xmm0, 4
  13890. vpslldq xmm1, xmm1, 4
  13891. vpor xmm4, xmm4, xmm2
  13892. vpor xmm6, xmm6, xmm0
  13893. vpor xmm4, xmm4, xmm1
  13894. ; ghash_red
  13895. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  13896. vpclmulqdq xmm0, xmm6, xmm2, 16
  13897. vpshufd xmm1, xmm6, 78
  13898. vpxor xmm1, xmm1, xmm0
  13899. vpclmulqdq xmm0, xmm1, xmm2, 16
  13900. vpshufd xmm1, xmm1, 78
  13901. vpxor xmm1, xmm1, xmm0
  13902. vpxor xmm4, xmm4, xmm1
  13903. vmovdqu OWORD PTR [rdx], xmm4
  13904. vzeroupper
  13905. ret
  13906. AES_GCM_ghash_block_avx2 ENDP
  13907. _text ENDS
  13908. _text SEGMENT READONLY PARA
  13909. AES_GCM_encrypt_update_avx2 PROC
  13910. push r12
  13911. push r13
  13912. push r14
  13913. push r15
  13914. push rdi
  13915. mov rax, rcx
  13916. mov r10, r8
  13917. mov r8d, edx
  13918. mov r11, r9
  13919. mov r9d, DWORD PTR [rsp+80]
  13920. mov r12, QWORD PTR [rsp+88]
  13921. mov r13, QWORD PTR [rsp+96]
  13922. mov r14, QWORD PTR [rsp+104]
  13923. sub rsp, 152
  13924. vmovdqu xmm6, OWORD PTR [r12]
  13925. vmovdqu xmm5, OWORD PTR [r13]
  13926. vmovdqu xmm4, OWORD PTR [r14]
  13927. vpsrlq xmm1, xmm5, 63
  13928. vpsllq xmm0, xmm5, 1
  13929. vpslldq xmm1, xmm1, 8
  13930. vpor xmm0, xmm0, xmm1
  13931. vpshufd xmm5, xmm5, 255
  13932. vpsrad xmm5, xmm5, 31
  13933. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  13934. vpxor xmm5, xmm5, xmm0
  13935. xor edi, edi
  13936. cmp r9d, 128
  13937. mov r15d, r9d
  13938. jl L_AES_GCM_encrypt_update_avx2_done_128
  13939. and r15d, 4294967168
  13940. vmovdqu OWORD PTR [rsp+128], xmm4
  13941. vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
  13942. ; H ^ 1 and H ^ 2
  13943. vpclmulqdq xmm9, xmm5, xmm5, 0
  13944. vpclmulqdq xmm10, xmm5, xmm5, 17
  13945. vpclmulqdq xmm8, xmm9, xmm3, 16
  13946. vpshufd xmm9, xmm9, 78
  13947. vpxor xmm9, xmm9, xmm8
  13948. vpclmulqdq xmm8, xmm9, xmm3, 16
  13949. vpshufd xmm9, xmm9, 78
  13950. vpxor xmm9, xmm9, xmm8
  13951. vpxor xmm0, xmm10, xmm9
  13952. vmovdqu OWORD PTR [rsp], xmm5
  13953. vmovdqu OWORD PTR [rsp+16], xmm0
  13954. ; H ^ 3 and H ^ 4
  13955. vpclmulqdq xmm11, xmm0, xmm5, 16
  13956. vpclmulqdq xmm10, xmm0, xmm5, 1
  13957. vpclmulqdq xmm9, xmm0, xmm5, 0
  13958. vpclmulqdq xmm12, xmm0, xmm5, 17
  13959. vpclmulqdq xmm13, xmm0, xmm0, 0
  13960. vpclmulqdq xmm14, xmm0, xmm0, 17
  13961. vpxor xmm11, xmm11, xmm10
  13962. vpslldq xmm10, xmm11, 8
  13963. vpsrldq xmm11, xmm11, 8
  13964. vpxor xmm10, xmm10, xmm9
  13965. vpclmulqdq xmm8, xmm13, xmm3, 16
  13966. vpclmulqdq xmm9, xmm10, xmm3, 16
  13967. vpshufd xmm10, xmm10, 78
  13968. vpshufd xmm13, xmm13, 78
  13969. vpxor xmm10, xmm10, xmm9
  13970. vpxor xmm13, xmm13, xmm8
  13971. vpclmulqdq xmm9, xmm10, xmm3, 16
  13972. vpclmulqdq xmm8, xmm13, xmm3, 16
  13973. vpshufd xmm10, xmm10, 78
  13974. vpshufd xmm13, xmm13, 78
  13975. vpxor xmm12, xmm12, xmm11
  13976. vpxor xmm13, xmm13, xmm8
  13977. vpxor xmm10, xmm10, xmm12
  13978. vpxor xmm2, xmm13, xmm14
  13979. vpxor xmm1, xmm10, xmm9
  13980. vmovdqu OWORD PTR [rsp+32], xmm1
  13981. vmovdqu OWORD PTR [rsp+48], xmm2
  13982. ; H ^ 5 and H ^ 6
  13983. vpclmulqdq xmm11, xmm1, xmm0, 16
  13984. vpclmulqdq xmm10, xmm1, xmm0, 1
  13985. vpclmulqdq xmm9, xmm1, xmm0, 0
  13986. vpclmulqdq xmm12, xmm1, xmm0, 17
  13987. vpclmulqdq xmm13, xmm1, xmm1, 0
  13988. vpclmulqdq xmm14, xmm1, xmm1, 17
  13989. vpxor xmm11, xmm11, xmm10
  13990. vpslldq xmm10, xmm11, 8
  13991. vpsrldq xmm11, xmm11, 8
  13992. vpxor xmm10, xmm10, xmm9
  13993. vpclmulqdq xmm8, xmm13, xmm3, 16
  13994. vpclmulqdq xmm9, xmm10, xmm3, 16
  13995. vpshufd xmm10, xmm10, 78
  13996. vpshufd xmm13, xmm13, 78
  13997. vpxor xmm10, xmm10, xmm9
  13998. vpxor xmm13, xmm13, xmm8
  13999. vpclmulqdq xmm9, xmm10, xmm3, 16
  14000. vpclmulqdq xmm8, xmm13, xmm3, 16
  14001. vpshufd xmm10, xmm10, 78
  14002. vpshufd xmm13, xmm13, 78
  14003. vpxor xmm12, xmm12, xmm11
  14004. vpxor xmm13, xmm13, xmm8
  14005. vpxor xmm10, xmm10, xmm12
  14006. vpxor xmm0, xmm13, xmm14
  14007. vpxor xmm7, xmm10, xmm9
  14008. vmovdqu OWORD PTR [rsp+64], xmm7
  14009. vmovdqu OWORD PTR [rsp+80], xmm0
  14010. ; H ^ 7 and H ^ 8
  14011. vpclmulqdq xmm11, xmm2, xmm1, 16
  14012. vpclmulqdq xmm10, xmm2, xmm1, 1
  14013. vpclmulqdq xmm9, xmm2, xmm1, 0
  14014. vpclmulqdq xmm12, xmm2, xmm1, 17
  14015. vpclmulqdq xmm13, xmm2, xmm2, 0
  14016. vpclmulqdq xmm14, xmm2, xmm2, 17
  14017. vpxor xmm11, xmm11, xmm10
  14018. vpslldq xmm10, xmm11, 8
  14019. vpsrldq xmm11, xmm11, 8
  14020. vpxor xmm10, xmm10, xmm9
  14021. vpclmulqdq xmm8, xmm13, xmm3, 16
  14022. vpclmulqdq xmm9, xmm10, xmm3, 16
  14023. vpshufd xmm10, xmm10, 78
  14024. vpshufd xmm13, xmm13, 78
  14025. vpxor xmm10, xmm10, xmm9
  14026. vpxor xmm13, xmm13, xmm8
  14027. vpclmulqdq xmm9, xmm10, xmm3, 16
  14028. vpclmulqdq xmm8, xmm13, xmm3, 16
  14029. vpshufd xmm10, xmm10, 78
  14030. vpshufd xmm13, xmm13, 78
  14031. vpxor xmm12, xmm12, xmm11
  14032. vpxor xmm13, xmm13, xmm8
  14033. vpxor xmm10, xmm10, xmm12
  14034. vpxor xmm0, xmm13, xmm14
  14035. vpxor xmm7, xmm10, xmm9
  14036. vmovdqu OWORD PTR [rsp+96], xmm7
  14037. vmovdqu OWORD PTR [rsp+112], xmm0
  14038. ; First 128 bytes of input
  14039. ; aesenc_128
  14040. ; aesenc_ctr
  14041. vmovdqu xmm0, OWORD PTR [rsp+128]
  14042. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  14043. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  14044. vpshufb xmm8, xmm0, xmm1
  14045. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  14046. vpshufb xmm9, xmm9, xmm1
  14047. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  14048. vpshufb xmm10, xmm10, xmm1
  14049. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  14050. vpshufb xmm11, xmm11, xmm1
  14051. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  14052. vpshufb xmm12, xmm12, xmm1
  14053. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  14054. vpshufb xmm13, xmm13, xmm1
  14055. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  14056. vpshufb xmm14, xmm14, xmm1
  14057. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  14058. vpshufb xmm15, xmm15, xmm1
  14059. ; aesenc_xor
  14060. vmovdqu xmm7, OWORD PTR [rax]
  14061. vmovdqu OWORD PTR [rsp+128], xmm0
  14062. vpxor xmm8, xmm8, xmm7
  14063. vpxor xmm9, xmm9, xmm7
  14064. vpxor xmm10, xmm10, xmm7
  14065. vpxor xmm11, xmm11, xmm7
  14066. vpxor xmm12, xmm12, xmm7
  14067. vpxor xmm13, xmm13, xmm7
  14068. vpxor xmm14, xmm14, xmm7
  14069. vpxor xmm15, xmm15, xmm7
  14070. vmovdqu xmm7, OWORD PTR [rax+16]
  14071. vaesenc xmm8, xmm8, xmm7
  14072. vaesenc xmm9, xmm9, xmm7
  14073. vaesenc xmm10, xmm10, xmm7
  14074. vaesenc xmm11, xmm11, xmm7
  14075. vaesenc xmm12, xmm12, xmm7
  14076. vaesenc xmm13, xmm13, xmm7
  14077. vaesenc xmm14, xmm14, xmm7
  14078. vaesenc xmm15, xmm15, xmm7
  14079. vmovdqu xmm7, OWORD PTR [rax+32]
  14080. vaesenc xmm8, xmm8, xmm7
  14081. vaesenc xmm9, xmm9, xmm7
  14082. vaesenc xmm10, xmm10, xmm7
  14083. vaesenc xmm11, xmm11, xmm7
  14084. vaesenc xmm12, xmm12, xmm7
  14085. vaesenc xmm13, xmm13, xmm7
  14086. vaesenc xmm14, xmm14, xmm7
  14087. vaesenc xmm15, xmm15, xmm7
  14088. vmovdqu xmm7, OWORD PTR [rax+48]
  14089. vaesenc xmm8, xmm8, xmm7
  14090. vaesenc xmm9, xmm9, xmm7
  14091. vaesenc xmm10, xmm10, xmm7
  14092. vaesenc xmm11, xmm11, xmm7
  14093. vaesenc xmm12, xmm12, xmm7
  14094. vaesenc xmm13, xmm13, xmm7
  14095. vaesenc xmm14, xmm14, xmm7
  14096. vaesenc xmm15, xmm15, xmm7
  14097. vmovdqu xmm7, OWORD PTR [rax+64]
  14098. vaesenc xmm8, xmm8, xmm7
  14099. vaesenc xmm9, xmm9, xmm7
  14100. vaesenc xmm10, xmm10, xmm7
  14101. vaesenc xmm11, xmm11, xmm7
  14102. vaesenc xmm12, xmm12, xmm7
  14103. vaesenc xmm13, xmm13, xmm7
  14104. vaesenc xmm14, xmm14, xmm7
  14105. vaesenc xmm15, xmm15, xmm7
  14106. vmovdqu xmm7, OWORD PTR [rax+80]
  14107. vaesenc xmm8, xmm8, xmm7
  14108. vaesenc xmm9, xmm9, xmm7
  14109. vaesenc xmm10, xmm10, xmm7
  14110. vaesenc xmm11, xmm11, xmm7
  14111. vaesenc xmm12, xmm12, xmm7
  14112. vaesenc xmm13, xmm13, xmm7
  14113. vaesenc xmm14, xmm14, xmm7
  14114. vaesenc xmm15, xmm15, xmm7
  14115. vmovdqu xmm7, OWORD PTR [rax+96]
  14116. vaesenc xmm8, xmm8, xmm7
  14117. vaesenc xmm9, xmm9, xmm7
  14118. vaesenc xmm10, xmm10, xmm7
  14119. vaesenc xmm11, xmm11, xmm7
  14120. vaesenc xmm12, xmm12, xmm7
  14121. vaesenc xmm13, xmm13, xmm7
  14122. vaesenc xmm14, xmm14, xmm7
  14123. vaesenc xmm15, xmm15, xmm7
  14124. vmovdqu xmm7, OWORD PTR [rax+112]
  14125. vaesenc xmm8, xmm8, xmm7
  14126. vaesenc xmm9, xmm9, xmm7
  14127. vaesenc xmm10, xmm10, xmm7
  14128. vaesenc xmm11, xmm11, xmm7
  14129. vaesenc xmm12, xmm12, xmm7
  14130. vaesenc xmm13, xmm13, xmm7
  14131. vaesenc xmm14, xmm14, xmm7
  14132. vaesenc xmm15, xmm15, xmm7
  14133. vmovdqu xmm7, OWORD PTR [rax+128]
  14134. vaesenc xmm8, xmm8, xmm7
  14135. vaesenc xmm9, xmm9, xmm7
  14136. vaesenc xmm10, xmm10, xmm7
  14137. vaesenc xmm11, xmm11, xmm7
  14138. vaesenc xmm12, xmm12, xmm7
  14139. vaesenc xmm13, xmm13, xmm7
  14140. vaesenc xmm14, xmm14, xmm7
  14141. vaesenc xmm15, xmm15, xmm7
  14142. vmovdqu xmm7, OWORD PTR [rax+144]
  14143. vaesenc xmm8, xmm8, xmm7
  14144. vaesenc xmm9, xmm9, xmm7
  14145. vaesenc xmm10, xmm10, xmm7
  14146. vaesenc xmm11, xmm11, xmm7
  14147. vaesenc xmm12, xmm12, xmm7
  14148. vaesenc xmm13, xmm13, xmm7
  14149. vaesenc xmm14, xmm14, xmm7
  14150. vaesenc xmm15, xmm15, xmm7
  14151. cmp r8d, 11
  14152. vmovdqu xmm7, OWORD PTR [rax+160]
  14153. jl L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done
  14154. vaesenc xmm8, xmm8, xmm7
  14155. vaesenc xmm9, xmm9, xmm7
  14156. vaesenc xmm10, xmm10, xmm7
  14157. vaesenc xmm11, xmm11, xmm7
  14158. vaesenc xmm12, xmm12, xmm7
  14159. vaesenc xmm13, xmm13, xmm7
  14160. vaesenc xmm14, xmm14, xmm7
  14161. vaesenc xmm15, xmm15, xmm7
  14162. vmovdqu xmm7, OWORD PTR [rax+176]
  14163. vaesenc xmm8, xmm8, xmm7
  14164. vaesenc xmm9, xmm9, xmm7
  14165. vaesenc xmm10, xmm10, xmm7
  14166. vaesenc xmm11, xmm11, xmm7
  14167. vaesenc xmm12, xmm12, xmm7
  14168. vaesenc xmm13, xmm13, xmm7
  14169. vaesenc xmm14, xmm14, xmm7
  14170. vaesenc xmm15, xmm15, xmm7
  14171. cmp r8d, 13
  14172. vmovdqu xmm7, OWORD PTR [rax+192]
  14173. jl L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done
  14174. vaesenc xmm8, xmm8, xmm7
  14175. vaesenc xmm9, xmm9, xmm7
  14176. vaesenc xmm10, xmm10, xmm7
  14177. vaesenc xmm11, xmm11, xmm7
  14178. vaesenc xmm12, xmm12, xmm7
  14179. vaesenc xmm13, xmm13, xmm7
  14180. vaesenc xmm14, xmm14, xmm7
  14181. vaesenc xmm15, xmm15, xmm7
  14182. vmovdqu xmm7, OWORD PTR [rax+208]
  14183. vaesenc xmm8, xmm8, xmm7
  14184. vaesenc xmm9, xmm9, xmm7
  14185. vaesenc xmm10, xmm10, xmm7
  14186. vaesenc xmm11, xmm11, xmm7
  14187. vaesenc xmm12, xmm12, xmm7
  14188. vaesenc xmm13, xmm13, xmm7
  14189. vaesenc xmm14, xmm14, xmm7
  14190. vaesenc xmm15, xmm15, xmm7
  14191. vmovdqu xmm7, OWORD PTR [rax+224]
  14192. L_AES_GCM_encrypt_update_avx2_aesenc_128_enc_done:
  14193. ; aesenc_last
  14194. vaesenclast xmm8, xmm8, xmm7
  14195. vaesenclast xmm9, xmm9, xmm7
  14196. vaesenclast xmm10, xmm10, xmm7
  14197. vaesenclast xmm11, xmm11, xmm7
  14198. vmovdqu xmm0, OWORD PTR [r11]
  14199. vmovdqu xmm1, OWORD PTR [r11+16]
  14200. vmovdqu xmm2, OWORD PTR [r11+32]
  14201. vmovdqu xmm3, OWORD PTR [r11+48]
  14202. vpxor xmm8, xmm8, xmm0
  14203. vpxor xmm9, xmm9, xmm1
  14204. vpxor xmm10, xmm10, xmm2
  14205. vpxor xmm11, xmm11, xmm3
  14206. vmovdqu OWORD PTR [r10], xmm8
  14207. vmovdqu OWORD PTR [r10+16], xmm9
  14208. vmovdqu OWORD PTR [r10+32], xmm10
  14209. vmovdqu OWORD PTR [r10+48], xmm11
  14210. vaesenclast xmm12, xmm12, xmm7
  14211. vaesenclast xmm13, xmm13, xmm7
  14212. vaesenclast xmm14, xmm14, xmm7
  14213. vaesenclast xmm15, xmm15, xmm7
  14214. vmovdqu xmm0, OWORD PTR [r11+64]
  14215. vmovdqu xmm1, OWORD PTR [r11+80]
  14216. vmovdqu xmm2, OWORD PTR [r11+96]
  14217. vmovdqu xmm3, OWORD PTR [r11+112]
  14218. vpxor xmm12, xmm12, xmm0
  14219. vpxor xmm13, xmm13, xmm1
  14220. vpxor xmm14, xmm14, xmm2
  14221. vpxor xmm15, xmm15, xmm3
  14222. vmovdqu OWORD PTR [r10+64], xmm12
  14223. vmovdqu OWORD PTR [r10+80], xmm13
  14224. vmovdqu OWORD PTR [r10+96], xmm14
  14225. vmovdqu OWORD PTR [r10+112], xmm15
  14226. cmp r15d, 128
  14227. mov edi, 128
  14228. jle L_AES_GCM_encrypt_update_avx2_end_128
  14229. ; More 128 bytes of input
  14230. L_AES_GCM_encrypt_update_avx2_ghash_128:
  14231. ; aesenc_128_ghash
  14232. lea rcx, QWORD PTR [r11+rdi]
  14233. lea rdx, QWORD PTR [r10+rdi]
  14234. ; aesenc_ctr
  14235. vmovdqu xmm0, OWORD PTR [rsp+128]
  14236. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  14237. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  14238. vpshufb xmm8, xmm0, xmm1
  14239. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  14240. vpshufb xmm9, xmm9, xmm1
  14241. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  14242. vpshufb xmm10, xmm10, xmm1
  14243. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  14244. vpshufb xmm11, xmm11, xmm1
  14245. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  14246. vpshufb xmm12, xmm12, xmm1
  14247. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  14248. vpshufb xmm13, xmm13, xmm1
  14249. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  14250. vpshufb xmm14, xmm14, xmm1
  14251. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  14252. vpshufb xmm15, xmm15, xmm1
  14253. ; aesenc_xor
  14254. vmovdqu xmm7, OWORD PTR [rax]
  14255. vmovdqu OWORD PTR [rsp+128], xmm0
  14256. vpxor xmm8, xmm8, xmm7
  14257. vpxor xmm9, xmm9, xmm7
  14258. vpxor xmm10, xmm10, xmm7
  14259. vpxor xmm11, xmm11, xmm7
  14260. vpxor xmm12, xmm12, xmm7
  14261. vpxor xmm13, xmm13, xmm7
  14262. vpxor xmm14, xmm14, xmm7
  14263. vpxor xmm15, xmm15, xmm7
  14264. ; aesenc_pclmul_1
  14265. vmovdqu xmm1, OWORD PTR [rdx+-128]
  14266. vmovdqu xmm0, OWORD PTR [rax+16]
  14267. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14268. vmovdqu xmm2, OWORD PTR [rsp+112]
  14269. vpxor xmm1, xmm1, xmm6
  14270. vpclmulqdq xmm5, xmm1, xmm2, 16
  14271. vpclmulqdq xmm3, xmm1, xmm2, 1
  14272. vpclmulqdq xmm6, xmm1, xmm2, 0
  14273. vpclmulqdq xmm7, xmm1, xmm2, 17
  14274. vaesenc xmm8, xmm8, xmm0
  14275. vaesenc xmm9, xmm9, xmm0
  14276. vaesenc xmm10, xmm10, xmm0
  14277. vaesenc xmm11, xmm11, xmm0
  14278. vaesenc xmm12, xmm12, xmm0
  14279. vaesenc xmm13, xmm13, xmm0
  14280. vaesenc xmm14, xmm14, xmm0
  14281. vaesenc xmm15, xmm15, xmm0
  14282. ; aesenc_pclmul_2
  14283. vmovdqu xmm1, OWORD PTR [rdx+-112]
  14284. vmovdqu xmm0, OWORD PTR [rsp+96]
  14285. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14286. vpxor xmm5, xmm5, xmm3
  14287. vpclmulqdq xmm2, xmm1, xmm0, 16
  14288. vpclmulqdq xmm3, xmm1, xmm0, 1
  14289. vpclmulqdq xmm4, xmm1, xmm0, 0
  14290. vpclmulqdq xmm1, xmm1, xmm0, 17
  14291. vmovdqu xmm0, OWORD PTR [rax+32]
  14292. vpxor xmm7, xmm7, xmm1
  14293. vaesenc xmm8, xmm8, xmm0
  14294. vaesenc xmm9, xmm9, xmm0
  14295. vaesenc xmm10, xmm10, xmm0
  14296. vaesenc xmm11, xmm11, xmm0
  14297. vaesenc xmm12, xmm12, xmm0
  14298. vaesenc xmm13, xmm13, xmm0
  14299. vaesenc xmm14, xmm14, xmm0
  14300. vaesenc xmm15, xmm15, xmm0
  14301. ; aesenc_pclmul_n
  14302. vmovdqu xmm1, OWORD PTR [rdx+-96]
  14303. vmovdqu xmm0, OWORD PTR [rsp+80]
  14304. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14305. vpxor xmm5, xmm5, xmm2
  14306. vpclmulqdq xmm2, xmm1, xmm0, 16
  14307. vpxor xmm5, xmm5, xmm3
  14308. vpclmulqdq xmm3, xmm1, xmm0, 1
  14309. vpxor xmm6, xmm6, xmm4
  14310. vpclmulqdq xmm4, xmm1, xmm0, 0
  14311. vpclmulqdq xmm1, xmm1, xmm0, 17
  14312. vmovdqu xmm0, OWORD PTR [rax+48]
  14313. vpxor xmm7, xmm7, xmm1
  14314. vaesenc xmm8, xmm8, xmm0
  14315. vaesenc xmm9, xmm9, xmm0
  14316. vaesenc xmm10, xmm10, xmm0
  14317. vaesenc xmm11, xmm11, xmm0
  14318. vaesenc xmm12, xmm12, xmm0
  14319. vaesenc xmm13, xmm13, xmm0
  14320. vaesenc xmm14, xmm14, xmm0
  14321. vaesenc xmm15, xmm15, xmm0
  14322. ; aesenc_pclmul_n
  14323. vmovdqu xmm1, OWORD PTR [rdx+-80]
  14324. vmovdqu xmm0, OWORD PTR [rsp+64]
  14325. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14326. vpxor xmm5, xmm5, xmm2
  14327. vpclmulqdq xmm2, xmm1, xmm0, 16
  14328. vpxor xmm5, xmm5, xmm3
  14329. vpclmulqdq xmm3, xmm1, xmm0, 1
  14330. vpxor xmm6, xmm6, xmm4
  14331. vpclmulqdq xmm4, xmm1, xmm0, 0
  14332. vpclmulqdq xmm1, xmm1, xmm0, 17
  14333. vmovdqu xmm0, OWORD PTR [rax+64]
  14334. vpxor xmm7, xmm7, xmm1
  14335. vaesenc xmm8, xmm8, xmm0
  14336. vaesenc xmm9, xmm9, xmm0
  14337. vaesenc xmm10, xmm10, xmm0
  14338. vaesenc xmm11, xmm11, xmm0
  14339. vaesenc xmm12, xmm12, xmm0
  14340. vaesenc xmm13, xmm13, xmm0
  14341. vaesenc xmm14, xmm14, xmm0
  14342. vaesenc xmm15, xmm15, xmm0
  14343. ; aesenc_pclmul_n
  14344. vmovdqu xmm1, OWORD PTR [rdx+-64]
  14345. vmovdqu xmm0, OWORD PTR [rsp+48]
  14346. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14347. vpxor xmm5, xmm5, xmm2
  14348. vpclmulqdq xmm2, xmm1, xmm0, 16
  14349. vpxor xmm5, xmm5, xmm3
  14350. vpclmulqdq xmm3, xmm1, xmm0, 1
  14351. vpxor xmm6, xmm6, xmm4
  14352. vpclmulqdq xmm4, xmm1, xmm0, 0
  14353. vpclmulqdq xmm1, xmm1, xmm0, 17
  14354. vmovdqu xmm0, OWORD PTR [rax+80]
  14355. vpxor xmm7, xmm7, xmm1
  14356. vaesenc xmm8, xmm8, xmm0
  14357. vaesenc xmm9, xmm9, xmm0
  14358. vaesenc xmm10, xmm10, xmm0
  14359. vaesenc xmm11, xmm11, xmm0
  14360. vaesenc xmm12, xmm12, xmm0
  14361. vaesenc xmm13, xmm13, xmm0
  14362. vaesenc xmm14, xmm14, xmm0
  14363. vaesenc xmm15, xmm15, xmm0
  14364. ; aesenc_pclmul_n
  14365. vmovdqu xmm1, OWORD PTR [rdx+-48]
  14366. vmovdqu xmm0, OWORD PTR [rsp+32]
  14367. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14368. vpxor xmm5, xmm5, xmm2
  14369. vpclmulqdq xmm2, xmm1, xmm0, 16
  14370. vpxor xmm5, xmm5, xmm3
  14371. vpclmulqdq xmm3, xmm1, xmm0, 1
  14372. vpxor xmm6, xmm6, xmm4
  14373. vpclmulqdq xmm4, xmm1, xmm0, 0
  14374. vpclmulqdq xmm1, xmm1, xmm0, 17
  14375. vmovdqu xmm0, OWORD PTR [rax+96]
  14376. vpxor xmm7, xmm7, xmm1
  14377. vaesenc xmm8, xmm8, xmm0
  14378. vaesenc xmm9, xmm9, xmm0
  14379. vaesenc xmm10, xmm10, xmm0
  14380. vaesenc xmm11, xmm11, xmm0
  14381. vaesenc xmm12, xmm12, xmm0
  14382. vaesenc xmm13, xmm13, xmm0
  14383. vaesenc xmm14, xmm14, xmm0
  14384. vaesenc xmm15, xmm15, xmm0
  14385. ; aesenc_pclmul_n
  14386. vmovdqu xmm1, OWORD PTR [rdx+-32]
  14387. vmovdqu xmm0, OWORD PTR [rsp+16]
  14388. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14389. vpxor xmm5, xmm5, xmm2
  14390. vpclmulqdq xmm2, xmm1, xmm0, 16
  14391. vpxor xmm5, xmm5, xmm3
  14392. vpclmulqdq xmm3, xmm1, xmm0, 1
  14393. vpxor xmm6, xmm6, xmm4
  14394. vpclmulqdq xmm4, xmm1, xmm0, 0
  14395. vpclmulqdq xmm1, xmm1, xmm0, 17
  14396. vmovdqu xmm0, OWORD PTR [rax+112]
  14397. vpxor xmm7, xmm7, xmm1
  14398. vaesenc xmm8, xmm8, xmm0
  14399. vaesenc xmm9, xmm9, xmm0
  14400. vaesenc xmm10, xmm10, xmm0
  14401. vaesenc xmm11, xmm11, xmm0
  14402. vaesenc xmm12, xmm12, xmm0
  14403. vaesenc xmm13, xmm13, xmm0
  14404. vaesenc xmm14, xmm14, xmm0
  14405. vaesenc xmm15, xmm15, xmm0
  14406. ; aesenc_pclmul_n
  14407. vmovdqu xmm1, OWORD PTR [rdx+-16]
  14408. vmovdqu xmm0, OWORD PTR [rsp]
  14409. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14410. vpxor xmm5, xmm5, xmm2
  14411. vpclmulqdq xmm2, xmm1, xmm0, 16
  14412. vpxor xmm5, xmm5, xmm3
  14413. vpclmulqdq xmm3, xmm1, xmm0, 1
  14414. vpxor xmm6, xmm6, xmm4
  14415. vpclmulqdq xmm4, xmm1, xmm0, 0
  14416. vpclmulqdq xmm1, xmm1, xmm0, 17
  14417. vmovdqu xmm0, OWORD PTR [rax+128]
  14418. vpxor xmm7, xmm7, xmm1
  14419. vaesenc xmm8, xmm8, xmm0
  14420. vaesenc xmm9, xmm9, xmm0
  14421. vaesenc xmm10, xmm10, xmm0
  14422. vaesenc xmm11, xmm11, xmm0
  14423. vaesenc xmm12, xmm12, xmm0
  14424. vaesenc xmm13, xmm13, xmm0
  14425. vaesenc xmm14, xmm14, xmm0
  14426. vaesenc xmm15, xmm15, xmm0
  14427. ; aesenc_pclmul_l
  14428. vpxor xmm5, xmm5, xmm2
  14429. vpxor xmm6, xmm6, xmm4
  14430. vpxor xmm5, xmm5, xmm3
  14431. vpslldq xmm1, xmm5, 8
  14432. vpsrldq xmm5, xmm5, 8
  14433. vmovdqu xmm4, OWORD PTR [rax+144]
  14434. vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
  14435. vaesenc xmm8, xmm8, xmm4
  14436. vpxor xmm6, xmm6, xmm1
  14437. vpxor xmm7, xmm7, xmm5
  14438. vpclmulqdq xmm3, xmm6, xmm0, 16
  14439. vaesenc xmm9, xmm9, xmm4
  14440. vaesenc xmm10, xmm10, xmm4
  14441. vaesenc xmm11, xmm11, xmm4
  14442. vpshufd xmm6, xmm6, 78
  14443. vpxor xmm6, xmm6, xmm3
  14444. vpclmulqdq xmm3, xmm6, xmm0, 16
  14445. vaesenc xmm12, xmm12, xmm4
  14446. vaesenc xmm13, xmm13, xmm4
  14447. vaesenc xmm14, xmm14, xmm4
  14448. vpshufd xmm6, xmm6, 78
  14449. vpxor xmm6, xmm6, xmm3
  14450. vpxor xmm6, xmm6, xmm7
  14451. vaesenc xmm15, xmm15, xmm4
  14452. cmp r8d, 11
  14453. vmovdqu xmm7, OWORD PTR [rax+160]
  14454. jl L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done
  14455. vaesenc xmm8, xmm8, xmm7
  14456. vaesenc xmm9, xmm9, xmm7
  14457. vaesenc xmm10, xmm10, xmm7
  14458. vaesenc xmm11, xmm11, xmm7
  14459. vaesenc xmm12, xmm12, xmm7
  14460. vaesenc xmm13, xmm13, xmm7
  14461. vaesenc xmm14, xmm14, xmm7
  14462. vaesenc xmm15, xmm15, xmm7
  14463. vmovdqu xmm7, OWORD PTR [rax+176]
  14464. vaesenc xmm8, xmm8, xmm7
  14465. vaesenc xmm9, xmm9, xmm7
  14466. vaesenc xmm10, xmm10, xmm7
  14467. vaesenc xmm11, xmm11, xmm7
  14468. vaesenc xmm12, xmm12, xmm7
  14469. vaesenc xmm13, xmm13, xmm7
  14470. vaesenc xmm14, xmm14, xmm7
  14471. vaesenc xmm15, xmm15, xmm7
  14472. cmp r8d, 13
  14473. vmovdqu xmm7, OWORD PTR [rax+192]
  14474. jl L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done
  14475. vaesenc xmm8, xmm8, xmm7
  14476. vaesenc xmm9, xmm9, xmm7
  14477. vaesenc xmm10, xmm10, xmm7
  14478. vaesenc xmm11, xmm11, xmm7
  14479. vaesenc xmm12, xmm12, xmm7
  14480. vaesenc xmm13, xmm13, xmm7
  14481. vaesenc xmm14, xmm14, xmm7
  14482. vaesenc xmm15, xmm15, xmm7
  14483. vmovdqu xmm7, OWORD PTR [rax+208]
  14484. vaesenc xmm8, xmm8, xmm7
  14485. vaesenc xmm9, xmm9, xmm7
  14486. vaesenc xmm10, xmm10, xmm7
  14487. vaesenc xmm11, xmm11, xmm7
  14488. vaesenc xmm12, xmm12, xmm7
  14489. vaesenc xmm13, xmm13, xmm7
  14490. vaesenc xmm14, xmm14, xmm7
  14491. vaesenc xmm15, xmm15, xmm7
  14492. vmovdqu xmm7, OWORD PTR [rax+224]
  14493. L_AES_GCM_encrypt_update_avx2_aesenc_128_ghash_avx_done:
  14494. ; aesenc_last
  14495. vaesenclast xmm8, xmm8, xmm7
  14496. vaesenclast xmm9, xmm9, xmm7
  14497. vaesenclast xmm10, xmm10, xmm7
  14498. vaesenclast xmm11, xmm11, xmm7
  14499. vmovdqu xmm0, OWORD PTR [rcx]
  14500. vmovdqu xmm1, OWORD PTR [rcx+16]
  14501. vmovdqu xmm2, OWORD PTR [rcx+32]
  14502. vmovdqu xmm3, OWORD PTR [rcx+48]
  14503. vpxor xmm8, xmm8, xmm0
  14504. vpxor xmm9, xmm9, xmm1
  14505. vpxor xmm10, xmm10, xmm2
  14506. vpxor xmm11, xmm11, xmm3
  14507. vmovdqu OWORD PTR [rdx], xmm8
  14508. vmovdqu OWORD PTR [rdx+16], xmm9
  14509. vmovdqu OWORD PTR [rdx+32], xmm10
  14510. vmovdqu OWORD PTR [rdx+48], xmm11
  14511. vaesenclast xmm12, xmm12, xmm7
  14512. vaesenclast xmm13, xmm13, xmm7
  14513. vaesenclast xmm14, xmm14, xmm7
  14514. vaesenclast xmm15, xmm15, xmm7
  14515. vmovdqu xmm0, OWORD PTR [rcx+64]
  14516. vmovdqu xmm1, OWORD PTR [rcx+80]
  14517. vmovdqu xmm2, OWORD PTR [rcx+96]
  14518. vmovdqu xmm3, OWORD PTR [rcx+112]
  14519. vpxor xmm12, xmm12, xmm0
  14520. vpxor xmm13, xmm13, xmm1
  14521. vpxor xmm14, xmm14, xmm2
  14522. vpxor xmm15, xmm15, xmm3
  14523. vmovdqu OWORD PTR [rdx+64], xmm12
  14524. vmovdqu OWORD PTR [rdx+80], xmm13
  14525. vmovdqu OWORD PTR [rdx+96], xmm14
  14526. vmovdqu OWORD PTR [rdx+112], xmm15
  14527. ; aesenc_128_ghash - end
  14528. add edi, 128
  14529. cmp edi, r15d
  14530. jl L_AES_GCM_encrypt_update_avx2_ghash_128
  14531. L_AES_GCM_encrypt_update_avx2_end_128:
  14532. vmovdqu xmm4, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14533. vpshufb xmm8, xmm8, xmm4
  14534. vpshufb xmm9, xmm9, xmm4
  14535. vpshufb xmm10, xmm10, xmm4
  14536. vpshufb xmm11, xmm11, xmm4
  14537. vpshufb xmm12, xmm12, xmm4
  14538. vpshufb xmm13, xmm13, xmm4
  14539. vpshufb xmm14, xmm14, xmm4
  14540. vpshufb xmm15, xmm15, xmm4
  14541. vpxor xmm8, xmm8, xmm6
  14542. vmovdqu xmm7, OWORD PTR [rsp]
  14543. vpclmulqdq xmm5, xmm7, xmm15, 16
  14544. vpclmulqdq xmm1, xmm7, xmm15, 1
  14545. vpclmulqdq xmm4, xmm7, xmm15, 0
  14546. vpclmulqdq xmm6, xmm7, xmm15, 17
  14547. vpxor xmm5, xmm5, xmm1
  14548. vmovdqu xmm7, OWORD PTR [rsp+16]
  14549. vpclmulqdq xmm2, xmm7, xmm14, 16
  14550. vpclmulqdq xmm1, xmm7, xmm14, 1
  14551. vpclmulqdq xmm0, xmm7, xmm14, 0
  14552. vpclmulqdq xmm3, xmm7, xmm14, 17
  14553. vpxor xmm2, xmm2, xmm1
  14554. vpxor xmm6, xmm6, xmm3
  14555. vpxor xmm5, xmm5, xmm2
  14556. vpxor xmm4, xmm4, xmm0
  14557. vmovdqu xmm15, OWORD PTR [rsp+32]
  14558. vmovdqu xmm7, OWORD PTR [rsp+48]
  14559. vpclmulqdq xmm2, xmm15, xmm13, 16
  14560. vpclmulqdq xmm1, xmm15, xmm13, 1
  14561. vpclmulqdq xmm0, xmm15, xmm13, 0
  14562. vpclmulqdq xmm3, xmm15, xmm13, 17
  14563. vpxor xmm2, xmm2, xmm1
  14564. vpxor xmm6, xmm6, xmm3
  14565. vpxor xmm5, xmm5, xmm2
  14566. vpxor xmm4, xmm4, xmm0
  14567. vpclmulqdq xmm2, xmm7, xmm12, 16
  14568. vpclmulqdq xmm1, xmm7, xmm12, 1
  14569. vpclmulqdq xmm0, xmm7, xmm12, 0
  14570. vpclmulqdq xmm3, xmm7, xmm12, 17
  14571. vpxor xmm2, xmm2, xmm1
  14572. vpxor xmm6, xmm6, xmm3
  14573. vpxor xmm5, xmm5, xmm2
  14574. vpxor xmm4, xmm4, xmm0
  14575. vmovdqu xmm15, OWORD PTR [rsp+64]
  14576. vmovdqu xmm7, OWORD PTR [rsp+80]
  14577. vpclmulqdq xmm2, xmm15, xmm11, 16
  14578. vpclmulqdq xmm1, xmm15, xmm11, 1
  14579. vpclmulqdq xmm0, xmm15, xmm11, 0
  14580. vpclmulqdq xmm3, xmm15, xmm11, 17
  14581. vpxor xmm2, xmm2, xmm1
  14582. vpxor xmm6, xmm6, xmm3
  14583. vpxor xmm5, xmm5, xmm2
  14584. vpxor xmm4, xmm4, xmm0
  14585. vpclmulqdq xmm2, xmm7, xmm10, 16
  14586. vpclmulqdq xmm1, xmm7, xmm10, 1
  14587. vpclmulqdq xmm0, xmm7, xmm10, 0
  14588. vpclmulqdq xmm3, xmm7, xmm10, 17
  14589. vpxor xmm2, xmm2, xmm1
  14590. vpxor xmm6, xmm6, xmm3
  14591. vpxor xmm5, xmm5, xmm2
  14592. vpxor xmm4, xmm4, xmm0
  14593. vmovdqu xmm15, OWORD PTR [rsp+96]
  14594. vmovdqu xmm7, OWORD PTR [rsp+112]
  14595. vpclmulqdq xmm2, xmm15, xmm9, 16
  14596. vpclmulqdq xmm1, xmm15, xmm9, 1
  14597. vpclmulqdq xmm0, xmm15, xmm9, 0
  14598. vpclmulqdq xmm3, xmm15, xmm9, 17
  14599. vpxor xmm2, xmm2, xmm1
  14600. vpxor xmm6, xmm6, xmm3
  14601. vpxor xmm5, xmm5, xmm2
  14602. vpxor xmm4, xmm4, xmm0
  14603. vpclmulqdq xmm2, xmm7, xmm8, 16
  14604. vpclmulqdq xmm1, xmm7, xmm8, 1
  14605. vpclmulqdq xmm0, xmm7, xmm8, 0
  14606. vpclmulqdq xmm3, xmm7, xmm8, 17
  14607. vpxor xmm2, xmm2, xmm1
  14608. vpxor xmm6, xmm6, xmm3
  14609. vpxor xmm5, xmm5, xmm2
  14610. vpxor xmm4, xmm4, xmm0
  14611. vpslldq xmm7, xmm5, 8
  14612. vpsrldq xmm5, xmm5, 8
  14613. vpxor xmm4, xmm4, xmm7
  14614. vpxor xmm6, xmm6, xmm5
  14615. ; ghash_red
  14616. vmovdqu xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128
  14617. vpclmulqdq xmm0, xmm4, xmm2, 16
  14618. vpshufd xmm1, xmm4, 78
  14619. vpxor xmm1, xmm1, xmm0
  14620. vpclmulqdq xmm0, xmm1, xmm2, 16
  14621. vpshufd xmm1, xmm1, 78
  14622. vpxor xmm1, xmm1, xmm0
  14623. vpxor xmm6, xmm6, xmm1
  14624. vmovdqu xmm5, OWORD PTR [rsp]
  14625. vmovdqu xmm4, OWORD PTR [rsp+128]
  14626. L_AES_GCM_encrypt_update_avx2_done_128:
  14627. cmp edi, r9d
  14628. je L_AES_GCM_encrypt_update_avx2_done_enc
  14629. mov r15d, r9d
  14630. and r15d, 4294967280
  14631. cmp edi, r15d
  14632. jge L_AES_GCM_encrypt_update_avx2_last_block_done
  14633. ; aesenc_block
  14634. vmovdqu xmm1, xmm4
  14635. vpshufb xmm0, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  14636. vpaddd xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_one
  14637. vpxor xmm0, xmm0, [rax]
  14638. vmovdqu xmm2, OWORD PTR [rax+16]
  14639. vaesenc xmm0, xmm0, xmm2
  14640. vmovdqu xmm2, OWORD PTR [rax+32]
  14641. vaesenc xmm0, xmm0, xmm2
  14642. vmovdqu xmm2, OWORD PTR [rax+48]
  14643. vaesenc xmm0, xmm0, xmm2
  14644. vmovdqu xmm2, OWORD PTR [rax+64]
  14645. vaesenc xmm0, xmm0, xmm2
  14646. vmovdqu xmm2, OWORD PTR [rax+80]
  14647. vaesenc xmm0, xmm0, xmm2
  14648. vmovdqu xmm2, OWORD PTR [rax+96]
  14649. vaesenc xmm0, xmm0, xmm2
  14650. vmovdqu xmm2, OWORD PTR [rax+112]
  14651. vaesenc xmm0, xmm0, xmm2
  14652. vmovdqu xmm2, OWORD PTR [rax+128]
  14653. vaesenc xmm0, xmm0, xmm2
  14654. vmovdqu xmm2, OWORD PTR [rax+144]
  14655. vaesenc xmm0, xmm0, xmm2
  14656. vmovdqu xmm4, xmm1
  14657. cmp r8d, 11
  14658. vmovdqu xmm1, OWORD PTR [rax+160]
  14659. jl L_AES_GCM_encrypt_update_avx2_aesenc_block_last
  14660. vaesenc xmm0, xmm0, xmm1
  14661. vmovdqu xmm2, OWORD PTR [rax+176]
  14662. vaesenc xmm0, xmm0, xmm2
  14663. cmp r8d, 13
  14664. vmovdqu xmm1, OWORD PTR [rax+192]
  14665. jl L_AES_GCM_encrypt_update_avx2_aesenc_block_last
  14666. vaesenc xmm0, xmm0, xmm1
  14667. vmovdqu xmm2, OWORD PTR [rax+208]
  14668. vaesenc xmm0, xmm0, xmm2
  14669. vmovdqu xmm1, OWORD PTR [rax+224]
  14670. L_AES_GCM_encrypt_update_avx2_aesenc_block_last:
  14671. vaesenclast xmm0, xmm0, xmm1
  14672. vmovdqu xmm1, OWORD PTR [r11+rdi]
  14673. vpxor xmm0, xmm0, xmm1
  14674. vmovdqu OWORD PTR [r10+rdi], xmm0
  14675. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14676. vpxor xmm6, xmm6, xmm0
  14677. add edi, 16
  14678. cmp edi, r15d
  14679. jge L_AES_GCM_encrypt_update_avx2_last_block_ghash
  14680. L_AES_GCM_encrypt_update_avx2_last_block_start:
  14681. vmovdqu xmm12, OWORD PTR [r11+rdi]
  14682. vpshufb xmm11, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  14683. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  14684. ; aesenc_gfmul_sb
  14685. vpclmulqdq xmm2, xmm6, xmm5, 1
  14686. vpclmulqdq xmm3, xmm6, xmm5, 16
  14687. vpclmulqdq xmm1, xmm6, xmm5, 0
  14688. vpclmulqdq xmm8, xmm6, xmm5, 17
  14689. vpxor xmm11, xmm11, [rax]
  14690. vaesenc xmm11, xmm11, [rax+16]
  14691. vpxor xmm3, xmm3, xmm2
  14692. vpslldq xmm2, xmm3, 8
  14693. vpsrldq xmm3, xmm3, 8
  14694. vaesenc xmm11, xmm11, [rax+32]
  14695. vpxor xmm2, xmm2, xmm1
  14696. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  14697. vaesenc xmm11, xmm11, [rax+48]
  14698. vaesenc xmm11, xmm11, [rax+64]
  14699. vaesenc xmm11, xmm11, [rax+80]
  14700. vpshufd xmm2, xmm2, 78
  14701. vpxor xmm2, xmm2, xmm1
  14702. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  14703. vaesenc xmm11, xmm11, [rax+96]
  14704. vaesenc xmm11, xmm11, [rax+112]
  14705. vaesenc xmm11, xmm11, [rax+128]
  14706. vpshufd xmm2, xmm2, 78
  14707. vaesenc xmm11, xmm11, [rax+144]
  14708. vpxor xmm8, xmm8, xmm3
  14709. vpxor xmm2, xmm2, xmm8
  14710. vmovdqu xmm0, OWORD PTR [rax+160]
  14711. cmp r8d, 11
  14712. jl L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last
  14713. vaesenc xmm11, xmm11, xmm0
  14714. vaesenc xmm11, xmm11, [rax+176]
  14715. vmovdqu xmm0, OWORD PTR [rax+192]
  14716. cmp r8d, 13
  14717. jl L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last
  14718. vaesenc xmm11, xmm11, xmm0
  14719. vaesenc xmm11, xmm11, [rax+208]
  14720. vmovdqu xmm0, OWORD PTR [rax+224]
  14721. L_AES_GCM_encrypt_update_avx2_aesenc_gfmul_sb_last:
  14722. vaesenclast xmm11, xmm11, xmm0
  14723. vpxor xmm6, xmm2, xmm1
  14724. vpxor xmm11, xmm11, xmm12
  14725. vmovdqu OWORD PTR [r10+rdi], xmm11
  14726. vpshufb xmm11, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14727. vpxor xmm6, xmm6, xmm11
  14728. add edi, 16
  14729. cmp edi, r15d
  14730. jl L_AES_GCM_encrypt_update_avx2_last_block_start
  14731. L_AES_GCM_encrypt_update_avx2_last_block_ghash:
  14732. ; ghash_gfmul_red
  14733. vpclmulqdq xmm10, xmm6, xmm5, 16
  14734. vpclmulqdq xmm9, xmm6, xmm5, 1
  14735. vpclmulqdq xmm8, xmm6, xmm5, 0
  14736. vpxor xmm10, xmm10, xmm9
  14737. vpslldq xmm9, xmm10, 8
  14738. vpsrldq xmm10, xmm10, 8
  14739. vpxor xmm9, xmm9, xmm8
  14740. vpclmulqdq xmm6, xmm6, xmm5, 17
  14741. vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  14742. vpshufd xmm9, xmm9, 78
  14743. vpxor xmm9, xmm9, xmm8
  14744. vpclmulqdq xmm8, xmm9, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  14745. vpshufd xmm9, xmm9, 78
  14746. vpxor xmm6, xmm6, xmm10
  14747. vpxor xmm6, xmm6, xmm9
  14748. vpxor xmm6, xmm6, xmm8
  14749. L_AES_GCM_encrypt_update_avx2_last_block_done:
  14750. L_AES_GCM_encrypt_update_avx2_done_enc:
  14751. vmovdqu OWORD PTR [r12], xmm6
  14752. vmovdqu OWORD PTR [r14], xmm4
  14753. vzeroupper
  14754. add rsp, 152
  14755. pop rdi
  14756. pop r15
  14757. pop r14
  14758. pop r13
  14759. pop r12
  14760. ret
  14761. AES_GCM_encrypt_update_avx2 ENDP
  14762. _text ENDS
  14763. _text SEGMENT READONLY PARA
  14764. AES_GCM_encrypt_final_avx2 PROC
  14765. push r12
  14766. push r13
  14767. mov eax, DWORD PTR [rsp+56]
  14768. mov r10, QWORD PTR [rsp+64]
  14769. mov r11, QWORD PTR [rsp+72]
  14770. sub rsp, 16
  14771. vmovdqu xmm4, OWORD PTR [rcx]
  14772. vmovdqu xmm5, OWORD PTR [r10]
  14773. vmovdqu xmm6, OWORD PTR [r11]
  14774. vpsrlq xmm1, xmm5, 63
  14775. vpsllq xmm0, xmm5, 1
  14776. vpslldq xmm1, xmm1, 8
  14777. vpor xmm0, xmm0, xmm1
  14778. vpshufd xmm5, xmm5, 255
  14779. vpsrad xmm5, xmm5, 31
  14780. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  14781. vpxor xmm5, xmm5, xmm0
  14782. ; calc_tag
  14783. shl r9, 3
  14784. shl rax, 3
  14785. vmovq xmm0, r9
  14786. vmovq xmm1, rax
  14787. vpunpcklqdq xmm0, xmm0, xmm1
  14788. vpxor xmm0, xmm0, xmm4
  14789. ; ghash_gfmul_red
  14790. vpclmulqdq xmm7, xmm0, xmm5, 16
  14791. vpclmulqdq xmm3, xmm0, xmm5, 1
  14792. vpclmulqdq xmm2, xmm0, xmm5, 0
  14793. vpxor xmm7, xmm7, xmm3
  14794. vpslldq xmm3, xmm7, 8
  14795. vpsrldq xmm7, xmm7, 8
  14796. vpxor xmm3, xmm3, xmm2
  14797. vpclmulqdq xmm0, xmm0, xmm5, 17
  14798. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  14799. vpshufd xmm3, xmm3, 78
  14800. vpxor xmm3, xmm3, xmm2
  14801. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  14802. vpshufd xmm3, xmm3, 78
  14803. vpxor xmm0, xmm0, xmm7
  14804. vpxor xmm0, xmm0, xmm3
  14805. vpxor xmm0, xmm0, xmm2
  14806. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  14807. vpxor xmm0, xmm0, xmm6
  14808. ; store_tag
  14809. cmp r8d, 16
  14810. je L_AES_GCM_encrypt_final_avx2_store_tag_16
  14811. xor r12, r12
  14812. vmovdqu OWORD PTR [rsp], xmm0
  14813. L_AES_GCM_encrypt_final_avx2_store_tag_loop:
  14814. movzx r13d, BYTE PTR [rsp+r12]
  14815. mov BYTE PTR [rdx+r12], r13b
  14816. inc r12d
  14817. cmp r12d, r8d
  14818. jne L_AES_GCM_encrypt_final_avx2_store_tag_loop
  14819. jmp L_AES_GCM_encrypt_final_avx2_store_tag_done
  14820. L_AES_GCM_encrypt_final_avx2_store_tag_16:
  14821. vmovdqu OWORD PTR [rdx], xmm0
  14822. L_AES_GCM_encrypt_final_avx2_store_tag_done:
  14823. vzeroupper
  14824. add rsp, 16
  14825. pop r13
  14826. pop r12
  14827. ret
  14828. AES_GCM_encrypt_final_avx2 ENDP
  14829. _text ENDS
  14830. _text SEGMENT READONLY PARA
  14831. AES_GCM_decrypt_update_avx2 PROC
  14832. push r13
  14833. push r12
  14834. push r14
  14835. push r15
  14836. push rdi
  14837. mov rax, rcx
  14838. mov r10, r8
  14839. mov r8d, edx
  14840. mov r11, r9
  14841. mov r9d, DWORD PTR [rsp+80]
  14842. mov r12, QWORD PTR [rsp+88]
  14843. mov r14, QWORD PTR [rsp+96]
  14844. mov r15, QWORD PTR [rsp+104]
  14845. sub rsp, 168
  14846. vmovdqu xmm6, OWORD PTR [r12]
  14847. vmovdqu xmm5, OWORD PTR [r14]
  14848. vmovdqu xmm4, OWORD PTR [r15]
  14849. ; Calculate H
  14850. vpsrlq xmm1, xmm5, 63
  14851. vpsllq xmm0, xmm5, 1
  14852. vpslldq xmm1, xmm1, 8
  14853. vpor xmm0, xmm0, xmm1
  14854. vpshufd xmm5, xmm5, 255
  14855. vpsrad xmm5, xmm5, 31
  14856. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  14857. vpxor xmm5, xmm5, xmm0
  14858. xor edi, edi
  14859. cmp r9d, 128
  14860. mov r13d, r9d
  14861. jl L_AES_GCM_decrypt_update_avx2_done_128
  14862. and r13d, 4294967168
  14863. vmovdqu OWORD PTR [rsp+128], xmm4
  14864. vmovdqu OWORD PTR [rsp+144], xmm15
  14865. vmovdqu xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128
  14866. ; H ^ 1 and H ^ 2
  14867. vpclmulqdq xmm9, xmm5, xmm5, 0
  14868. vpclmulqdq xmm10, xmm5, xmm5, 17
  14869. vpclmulqdq xmm8, xmm9, xmm3, 16
  14870. vpshufd xmm9, xmm9, 78
  14871. vpxor xmm9, xmm9, xmm8
  14872. vpclmulqdq xmm8, xmm9, xmm3, 16
  14873. vpshufd xmm9, xmm9, 78
  14874. vpxor xmm9, xmm9, xmm8
  14875. vpxor xmm0, xmm10, xmm9
  14876. vmovdqu OWORD PTR [rsp], xmm5
  14877. vmovdqu OWORD PTR [rsp+16], xmm0
  14878. ; H ^ 3 and H ^ 4
  14879. vpclmulqdq xmm11, xmm0, xmm5, 16
  14880. vpclmulqdq xmm10, xmm0, xmm5, 1
  14881. vpclmulqdq xmm9, xmm0, xmm5, 0
  14882. vpclmulqdq xmm12, xmm0, xmm5, 17
  14883. vpclmulqdq xmm13, xmm0, xmm0, 0
  14884. vpclmulqdq xmm14, xmm0, xmm0, 17
  14885. vpxor xmm11, xmm11, xmm10
  14886. vpslldq xmm10, xmm11, 8
  14887. vpsrldq xmm11, xmm11, 8
  14888. vpxor xmm10, xmm10, xmm9
  14889. vpclmulqdq xmm8, xmm13, xmm3, 16
  14890. vpclmulqdq xmm9, xmm10, xmm3, 16
  14891. vpshufd xmm10, xmm10, 78
  14892. vpshufd xmm13, xmm13, 78
  14893. vpxor xmm10, xmm10, xmm9
  14894. vpxor xmm13, xmm13, xmm8
  14895. vpclmulqdq xmm9, xmm10, xmm3, 16
  14896. vpclmulqdq xmm8, xmm13, xmm3, 16
  14897. vpshufd xmm10, xmm10, 78
  14898. vpshufd xmm13, xmm13, 78
  14899. vpxor xmm12, xmm12, xmm11
  14900. vpxor xmm13, xmm13, xmm8
  14901. vpxor xmm10, xmm10, xmm12
  14902. vpxor xmm2, xmm13, xmm14
  14903. vpxor xmm1, xmm10, xmm9
  14904. vmovdqu OWORD PTR [rsp+32], xmm1
  14905. vmovdqu OWORD PTR [rsp+48], xmm2
  14906. ; H ^ 5 and H ^ 6
  14907. vpclmulqdq xmm11, xmm1, xmm0, 16
  14908. vpclmulqdq xmm10, xmm1, xmm0, 1
  14909. vpclmulqdq xmm9, xmm1, xmm0, 0
  14910. vpclmulqdq xmm12, xmm1, xmm0, 17
  14911. vpclmulqdq xmm13, xmm1, xmm1, 0
  14912. vpclmulqdq xmm14, xmm1, xmm1, 17
  14913. vpxor xmm11, xmm11, xmm10
  14914. vpslldq xmm10, xmm11, 8
  14915. vpsrldq xmm11, xmm11, 8
  14916. vpxor xmm10, xmm10, xmm9
  14917. vpclmulqdq xmm8, xmm13, xmm3, 16
  14918. vpclmulqdq xmm9, xmm10, xmm3, 16
  14919. vpshufd xmm10, xmm10, 78
  14920. vpshufd xmm13, xmm13, 78
  14921. vpxor xmm10, xmm10, xmm9
  14922. vpxor xmm13, xmm13, xmm8
  14923. vpclmulqdq xmm9, xmm10, xmm3, 16
  14924. vpclmulqdq xmm8, xmm13, xmm3, 16
  14925. vpshufd xmm10, xmm10, 78
  14926. vpshufd xmm13, xmm13, 78
  14927. vpxor xmm12, xmm12, xmm11
  14928. vpxor xmm13, xmm13, xmm8
  14929. vpxor xmm10, xmm10, xmm12
  14930. vpxor xmm0, xmm13, xmm14
  14931. vpxor xmm7, xmm10, xmm9
  14932. vmovdqu OWORD PTR [rsp+64], xmm7
  14933. vmovdqu OWORD PTR [rsp+80], xmm0
  14934. ; H ^ 7 and H ^ 8
  14935. vpclmulqdq xmm11, xmm2, xmm1, 16
  14936. vpclmulqdq xmm10, xmm2, xmm1, 1
  14937. vpclmulqdq xmm9, xmm2, xmm1, 0
  14938. vpclmulqdq xmm12, xmm2, xmm1, 17
  14939. vpclmulqdq xmm13, xmm2, xmm2, 0
  14940. vpclmulqdq xmm14, xmm2, xmm2, 17
  14941. vpxor xmm11, xmm11, xmm10
  14942. vpslldq xmm10, xmm11, 8
  14943. vpsrldq xmm11, xmm11, 8
  14944. vpxor xmm10, xmm10, xmm9
  14945. vpclmulqdq xmm8, xmm13, xmm3, 16
  14946. vpclmulqdq xmm9, xmm10, xmm3, 16
  14947. vpshufd xmm10, xmm10, 78
  14948. vpshufd xmm13, xmm13, 78
  14949. vpxor xmm10, xmm10, xmm9
  14950. vpxor xmm13, xmm13, xmm8
  14951. vpclmulqdq xmm9, xmm10, xmm3, 16
  14952. vpclmulqdq xmm8, xmm13, xmm3, 16
  14953. vpshufd xmm10, xmm10, 78
  14954. vpshufd xmm13, xmm13, 78
  14955. vpxor xmm12, xmm12, xmm11
  14956. vpxor xmm13, xmm13, xmm8
  14957. vpxor xmm10, xmm10, xmm12
  14958. vpxor xmm0, xmm13, xmm14
  14959. vpxor xmm7, xmm10, xmm9
  14960. vmovdqu OWORD PTR [rsp+96], xmm7
  14961. vmovdqu OWORD PTR [rsp+112], xmm0
  14962. L_AES_GCM_decrypt_update_avx2_ghash_128:
  14963. ; aesenc_128_ghash
  14964. lea rcx, QWORD PTR [r11+rdi]
  14965. lea rdx, QWORD PTR [r10+rdi]
  14966. ; aesenc_ctr
  14967. vmovdqu xmm0, OWORD PTR [rsp+128]
  14968. vmovdqu xmm1, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  14969. vpaddd xmm9, xmm0, OWORD PTR L_avx2_aes_gcm_one
  14970. vpshufb xmm8, xmm0, xmm1
  14971. vpaddd xmm10, xmm0, OWORD PTR L_avx2_aes_gcm_two
  14972. vpshufb xmm9, xmm9, xmm1
  14973. vpaddd xmm11, xmm0, OWORD PTR L_avx2_aes_gcm_three
  14974. vpshufb xmm10, xmm10, xmm1
  14975. vpaddd xmm12, xmm0, OWORD PTR L_avx2_aes_gcm_four
  14976. vpshufb xmm11, xmm11, xmm1
  14977. vpaddd xmm13, xmm0, OWORD PTR L_avx2_aes_gcm_five
  14978. vpshufb xmm12, xmm12, xmm1
  14979. vpaddd xmm14, xmm0, OWORD PTR L_avx2_aes_gcm_six
  14980. vpshufb xmm13, xmm13, xmm1
  14981. vpaddd xmm15, xmm0, OWORD PTR L_avx2_aes_gcm_seven
  14982. vpshufb xmm14, xmm14, xmm1
  14983. vpaddd xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_eight
  14984. vpshufb xmm15, xmm15, xmm1
  14985. ; aesenc_xor
  14986. vmovdqu xmm7, OWORD PTR [rax]
  14987. vmovdqu OWORD PTR [rsp+128], xmm0
  14988. vpxor xmm8, xmm8, xmm7
  14989. vpxor xmm9, xmm9, xmm7
  14990. vpxor xmm10, xmm10, xmm7
  14991. vpxor xmm11, xmm11, xmm7
  14992. vpxor xmm12, xmm12, xmm7
  14993. vpxor xmm13, xmm13, xmm7
  14994. vpxor xmm14, xmm14, xmm7
  14995. vpxor xmm15, xmm15, xmm7
  14996. ; aesenc_pclmul_1
  14997. vmovdqu xmm1, OWORD PTR [rcx]
  14998. vmovdqu xmm0, OWORD PTR [rax+16]
  14999. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15000. vmovdqu xmm2, OWORD PTR [rsp+112]
  15001. vpxor xmm1, xmm1, xmm6
  15002. vpclmulqdq xmm5, xmm1, xmm2, 16
  15003. vpclmulqdq xmm3, xmm1, xmm2, 1
  15004. vpclmulqdq xmm6, xmm1, xmm2, 0
  15005. vpclmulqdq xmm7, xmm1, xmm2, 17
  15006. vaesenc xmm8, xmm8, xmm0
  15007. vaesenc xmm9, xmm9, xmm0
  15008. vaesenc xmm10, xmm10, xmm0
  15009. vaesenc xmm11, xmm11, xmm0
  15010. vaesenc xmm12, xmm12, xmm0
  15011. vaesenc xmm13, xmm13, xmm0
  15012. vaesenc xmm14, xmm14, xmm0
  15013. vaesenc xmm15, xmm15, xmm0
  15014. ; aesenc_pclmul_2
  15015. vmovdqu xmm1, OWORD PTR [rcx+16]
  15016. vmovdqu xmm0, OWORD PTR [rsp+96]
  15017. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15018. vpxor xmm5, xmm5, xmm3
  15019. vpclmulqdq xmm2, xmm1, xmm0, 16
  15020. vpclmulqdq xmm3, xmm1, xmm0, 1
  15021. vpclmulqdq xmm4, xmm1, xmm0, 0
  15022. vpclmulqdq xmm1, xmm1, xmm0, 17
  15023. vmovdqu xmm0, OWORD PTR [rax+32]
  15024. vpxor xmm7, xmm7, xmm1
  15025. vaesenc xmm8, xmm8, xmm0
  15026. vaesenc xmm9, xmm9, xmm0
  15027. vaesenc xmm10, xmm10, xmm0
  15028. vaesenc xmm11, xmm11, xmm0
  15029. vaesenc xmm12, xmm12, xmm0
  15030. vaesenc xmm13, xmm13, xmm0
  15031. vaesenc xmm14, xmm14, xmm0
  15032. vaesenc xmm15, xmm15, xmm0
  15033. ; aesenc_pclmul_n
  15034. vmovdqu xmm1, OWORD PTR [rcx+32]
  15035. vmovdqu xmm0, OWORD PTR [rsp+80]
  15036. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15037. vpxor xmm5, xmm5, xmm2
  15038. vpclmulqdq xmm2, xmm1, xmm0, 16
  15039. vpxor xmm5, xmm5, xmm3
  15040. vpclmulqdq xmm3, xmm1, xmm0, 1
  15041. vpxor xmm6, xmm6, xmm4
  15042. vpclmulqdq xmm4, xmm1, xmm0, 0
  15043. vpclmulqdq xmm1, xmm1, xmm0, 17
  15044. vmovdqu xmm0, OWORD PTR [rax+48]
  15045. vpxor xmm7, xmm7, xmm1
  15046. vaesenc xmm8, xmm8, xmm0
  15047. vaesenc xmm9, xmm9, xmm0
  15048. vaesenc xmm10, xmm10, xmm0
  15049. vaesenc xmm11, xmm11, xmm0
  15050. vaesenc xmm12, xmm12, xmm0
  15051. vaesenc xmm13, xmm13, xmm0
  15052. vaesenc xmm14, xmm14, xmm0
  15053. vaesenc xmm15, xmm15, xmm0
  15054. ; aesenc_pclmul_n
  15055. vmovdqu xmm1, OWORD PTR [rcx+48]
  15056. vmovdqu xmm0, OWORD PTR [rsp+64]
  15057. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15058. vpxor xmm5, xmm5, xmm2
  15059. vpclmulqdq xmm2, xmm1, xmm0, 16
  15060. vpxor xmm5, xmm5, xmm3
  15061. vpclmulqdq xmm3, xmm1, xmm0, 1
  15062. vpxor xmm6, xmm6, xmm4
  15063. vpclmulqdq xmm4, xmm1, xmm0, 0
  15064. vpclmulqdq xmm1, xmm1, xmm0, 17
  15065. vmovdqu xmm0, OWORD PTR [rax+64]
  15066. vpxor xmm7, xmm7, xmm1
  15067. vaesenc xmm8, xmm8, xmm0
  15068. vaesenc xmm9, xmm9, xmm0
  15069. vaesenc xmm10, xmm10, xmm0
  15070. vaesenc xmm11, xmm11, xmm0
  15071. vaesenc xmm12, xmm12, xmm0
  15072. vaesenc xmm13, xmm13, xmm0
  15073. vaesenc xmm14, xmm14, xmm0
  15074. vaesenc xmm15, xmm15, xmm0
  15075. ; aesenc_pclmul_n
  15076. vmovdqu xmm1, OWORD PTR [rcx+64]
  15077. vmovdqu xmm0, OWORD PTR [rsp+48]
  15078. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15079. vpxor xmm5, xmm5, xmm2
  15080. vpclmulqdq xmm2, xmm1, xmm0, 16
  15081. vpxor xmm5, xmm5, xmm3
  15082. vpclmulqdq xmm3, xmm1, xmm0, 1
  15083. vpxor xmm6, xmm6, xmm4
  15084. vpclmulqdq xmm4, xmm1, xmm0, 0
  15085. vpclmulqdq xmm1, xmm1, xmm0, 17
  15086. vmovdqu xmm0, OWORD PTR [rax+80]
  15087. vpxor xmm7, xmm7, xmm1
  15088. vaesenc xmm8, xmm8, xmm0
  15089. vaesenc xmm9, xmm9, xmm0
  15090. vaesenc xmm10, xmm10, xmm0
  15091. vaesenc xmm11, xmm11, xmm0
  15092. vaesenc xmm12, xmm12, xmm0
  15093. vaesenc xmm13, xmm13, xmm0
  15094. vaesenc xmm14, xmm14, xmm0
  15095. vaesenc xmm15, xmm15, xmm0
  15096. ; aesenc_pclmul_n
  15097. vmovdqu xmm1, OWORD PTR [rcx+80]
  15098. vmovdqu xmm0, OWORD PTR [rsp+32]
  15099. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15100. vpxor xmm5, xmm5, xmm2
  15101. vpclmulqdq xmm2, xmm1, xmm0, 16
  15102. vpxor xmm5, xmm5, xmm3
  15103. vpclmulqdq xmm3, xmm1, xmm0, 1
  15104. vpxor xmm6, xmm6, xmm4
  15105. vpclmulqdq xmm4, xmm1, xmm0, 0
  15106. vpclmulqdq xmm1, xmm1, xmm0, 17
  15107. vmovdqu xmm0, OWORD PTR [rax+96]
  15108. vpxor xmm7, xmm7, xmm1
  15109. vaesenc xmm8, xmm8, xmm0
  15110. vaesenc xmm9, xmm9, xmm0
  15111. vaesenc xmm10, xmm10, xmm0
  15112. vaesenc xmm11, xmm11, xmm0
  15113. vaesenc xmm12, xmm12, xmm0
  15114. vaesenc xmm13, xmm13, xmm0
  15115. vaesenc xmm14, xmm14, xmm0
  15116. vaesenc xmm15, xmm15, xmm0
  15117. ; aesenc_pclmul_n
  15118. vmovdqu xmm1, OWORD PTR [rcx+96]
  15119. vmovdqu xmm0, OWORD PTR [rsp+16]
  15120. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15121. vpxor xmm5, xmm5, xmm2
  15122. vpclmulqdq xmm2, xmm1, xmm0, 16
  15123. vpxor xmm5, xmm5, xmm3
  15124. vpclmulqdq xmm3, xmm1, xmm0, 1
  15125. vpxor xmm6, xmm6, xmm4
  15126. vpclmulqdq xmm4, xmm1, xmm0, 0
  15127. vpclmulqdq xmm1, xmm1, xmm0, 17
  15128. vmovdqu xmm0, OWORD PTR [rax+112]
  15129. vpxor xmm7, xmm7, xmm1
  15130. vaesenc xmm8, xmm8, xmm0
  15131. vaesenc xmm9, xmm9, xmm0
  15132. vaesenc xmm10, xmm10, xmm0
  15133. vaesenc xmm11, xmm11, xmm0
  15134. vaesenc xmm12, xmm12, xmm0
  15135. vaesenc xmm13, xmm13, xmm0
  15136. vaesenc xmm14, xmm14, xmm0
  15137. vaesenc xmm15, xmm15, xmm0
  15138. ; aesenc_pclmul_n
  15139. vmovdqu xmm1, OWORD PTR [rcx+112]
  15140. vmovdqu xmm0, OWORD PTR [rsp]
  15141. vpshufb xmm1, xmm1, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15142. vpxor xmm5, xmm5, xmm2
  15143. vpclmulqdq xmm2, xmm1, xmm0, 16
  15144. vpxor xmm5, xmm5, xmm3
  15145. vpclmulqdq xmm3, xmm1, xmm0, 1
  15146. vpxor xmm6, xmm6, xmm4
  15147. vpclmulqdq xmm4, xmm1, xmm0, 0
  15148. vpclmulqdq xmm1, xmm1, xmm0, 17
  15149. vmovdqu xmm0, OWORD PTR [rax+128]
  15150. vpxor xmm7, xmm7, xmm1
  15151. vaesenc xmm8, xmm8, xmm0
  15152. vaesenc xmm9, xmm9, xmm0
  15153. vaesenc xmm10, xmm10, xmm0
  15154. vaesenc xmm11, xmm11, xmm0
  15155. vaesenc xmm12, xmm12, xmm0
  15156. vaesenc xmm13, xmm13, xmm0
  15157. vaesenc xmm14, xmm14, xmm0
  15158. vaesenc xmm15, xmm15, xmm0
  15159. ; aesenc_pclmul_l
  15160. vpxor xmm5, xmm5, xmm2
  15161. vpxor xmm6, xmm6, xmm4
  15162. vpxor xmm5, xmm5, xmm3
  15163. vpslldq xmm1, xmm5, 8
  15164. vpsrldq xmm5, xmm5, 8
  15165. vmovdqu xmm4, OWORD PTR [rax+144]
  15166. vmovdqu xmm0, OWORD PTR L_avx2_aes_gcm_mod2_128
  15167. vaesenc xmm8, xmm8, xmm4
  15168. vpxor xmm6, xmm6, xmm1
  15169. vpxor xmm7, xmm7, xmm5
  15170. vpclmulqdq xmm3, xmm6, xmm0, 16
  15171. vaesenc xmm9, xmm9, xmm4
  15172. vaesenc xmm10, xmm10, xmm4
  15173. vaesenc xmm11, xmm11, xmm4
  15174. vpshufd xmm6, xmm6, 78
  15175. vpxor xmm6, xmm6, xmm3
  15176. vpclmulqdq xmm3, xmm6, xmm0, 16
  15177. vaesenc xmm12, xmm12, xmm4
  15178. vaesenc xmm13, xmm13, xmm4
  15179. vaesenc xmm14, xmm14, xmm4
  15180. vpshufd xmm6, xmm6, 78
  15181. vpxor xmm6, xmm6, xmm3
  15182. vpxor xmm6, xmm6, xmm7
  15183. vaesenc xmm15, xmm15, xmm4
  15184. cmp r8d, 11
  15185. vmovdqu xmm7, OWORD PTR [rax+160]
  15186. jl L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done
  15187. vaesenc xmm8, xmm8, xmm7
  15188. vaesenc xmm9, xmm9, xmm7
  15189. vaesenc xmm10, xmm10, xmm7
  15190. vaesenc xmm11, xmm11, xmm7
  15191. vaesenc xmm12, xmm12, xmm7
  15192. vaesenc xmm13, xmm13, xmm7
  15193. vaesenc xmm14, xmm14, xmm7
  15194. vaesenc xmm15, xmm15, xmm7
  15195. vmovdqu xmm7, OWORD PTR [rax+176]
  15196. vaesenc xmm8, xmm8, xmm7
  15197. vaesenc xmm9, xmm9, xmm7
  15198. vaesenc xmm10, xmm10, xmm7
  15199. vaesenc xmm11, xmm11, xmm7
  15200. vaesenc xmm12, xmm12, xmm7
  15201. vaesenc xmm13, xmm13, xmm7
  15202. vaesenc xmm14, xmm14, xmm7
  15203. vaesenc xmm15, xmm15, xmm7
  15204. cmp r8d, 13
  15205. vmovdqu xmm7, OWORD PTR [rax+192]
  15206. jl L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done
  15207. vaesenc xmm8, xmm8, xmm7
  15208. vaesenc xmm9, xmm9, xmm7
  15209. vaesenc xmm10, xmm10, xmm7
  15210. vaesenc xmm11, xmm11, xmm7
  15211. vaesenc xmm12, xmm12, xmm7
  15212. vaesenc xmm13, xmm13, xmm7
  15213. vaesenc xmm14, xmm14, xmm7
  15214. vaesenc xmm15, xmm15, xmm7
  15215. vmovdqu xmm7, OWORD PTR [rax+208]
  15216. vaesenc xmm8, xmm8, xmm7
  15217. vaesenc xmm9, xmm9, xmm7
  15218. vaesenc xmm10, xmm10, xmm7
  15219. vaesenc xmm11, xmm11, xmm7
  15220. vaesenc xmm12, xmm12, xmm7
  15221. vaesenc xmm13, xmm13, xmm7
  15222. vaesenc xmm14, xmm14, xmm7
  15223. vaesenc xmm15, xmm15, xmm7
  15224. vmovdqu xmm7, OWORD PTR [rax+224]
  15225. L_AES_GCM_decrypt_update_avx2_aesenc_128_ghash_avx_done:
  15226. ; aesenc_last
  15227. vaesenclast xmm8, xmm8, xmm7
  15228. vaesenclast xmm9, xmm9, xmm7
  15229. vaesenclast xmm10, xmm10, xmm7
  15230. vaesenclast xmm11, xmm11, xmm7
  15231. vmovdqu xmm0, OWORD PTR [rcx]
  15232. vmovdqu xmm1, OWORD PTR [rcx+16]
  15233. vmovdqu xmm2, OWORD PTR [rcx+32]
  15234. vmovdqu xmm3, OWORD PTR [rcx+48]
  15235. vpxor xmm8, xmm8, xmm0
  15236. vpxor xmm9, xmm9, xmm1
  15237. vpxor xmm10, xmm10, xmm2
  15238. vpxor xmm11, xmm11, xmm3
  15239. vmovdqu OWORD PTR [rdx], xmm8
  15240. vmovdqu OWORD PTR [rdx+16], xmm9
  15241. vmovdqu OWORD PTR [rdx+32], xmm10
  15242. vmovdqu OWORD PTR [rdx+48], xmm11
  15243. vaesenclast xmm12, xmm12, xmm7
  15244. vaesenclast xmm13, xmm13, xmm7
  15245. vaesenclast xmm14, xmm14, xmm7
  15246. vaesenclast xmm15, xmm15, xmm7
  15247. vmovdqu xmm0, OWORD PTR [rcx+64]
  15248. vmovdqu xmm1, OWORD PTR [rcx+80]
  15249. vmovdqu xmm2, OWORD PTR [rcx+96]
  15250. vmovdqu xmm3, OWORD PTR [rcx+112]
  15251. vpxor xmm12, xmm12, xmm0
  15252. vpxor xmm13, xmm13, xmm1
  15253. vpxor xmm14, xmm14, xmm2
  15254. vpxor xmm15, xmm15, xmm3
  15255. vmovdqu OWORD PTR [rdx+64], xmm12
  15256. vmovdqu OWORD PTR [rdx+80], xmm13
  15257. vmovdqu OWORD PTR [rdx+96], xmm14
  15258. vmovdqu OWORD PTR [rdx+112], xmm15
  15259. ; aesenc_128_ghash - end
  15260. add edi, 128
  15261. cmp edi, r13d
  15262. jl L_AES_GCM_decrypt_update_avx2_ghash_128
  15263. vmovdqu xmm5, OWORD PTR [rsp]
  15264. vmovdqu xmm4, OWORD PTR [rsp+128]
  15265. vmovdqu xmm15, OWORD PTR [rsp+144]
  15266. L_AES_GCM_decrypt_update_avx2_done_128:
  15267. cmp edi, r9d
  15268. jge L_AES_GCM_decrypt_update_avx2_done_dec
  15269. mov r13d, r9d
  15270. and r13d, 4294967280
  15271. cmp edi, r13d
  15272. jge L_AES_GCM_decrypt_update_avx2_last_block_done
  15273. L_AES_GCM_decrypt_update_avx2_last_block_start:
  15274. vmovdqu xmm11, OWORD PTR [r11+rdi]
  15275. vpshufb xmm10, xmm4, OWORD PTR L_avx2_aes_gcm_bswap_epi64
  15276. vpshufb xmm12, xmm11, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15277. vpaddd xmm4, xmm4, OWORD PTR L_avx2_aes_gcm_one
  15278. vpxor xmm12, xmm12, xmm6
  15279. ; aesenc_gfmul_sb
  15280. vpclmulqdq xmm2, xmm12, xmm5, 1
  15281. vpclmulqdq xmm3, xmm12, xmm5, 16
  15282. vpclmulqdq xmm1, xmm12, xmm5, 0
  15283. vpclmulqdq xmm8, xmm12, xmm5, 17
  15284. vpxor xmm10, xmm10, [rax]
  15285. vaesenc xmm10, xmm10, [rax+16]
  15286. vpxor xmm3, xmm3, xmm2
  15287. vpslldq xmm2, xmm3, 8
  15288. vpsrldq xmm3, xmm3, 8
  15289. vaesenc xmm10, xmm10, [rax+32]
  15290. vpxor xmm2, xmm2, xmm1
  15291. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15292. vaesenc xmm10, xmm10, [rax+48]
  15293. vaesenc xmm10, xmm10, [rax+64]
  15294. vaesenc xmm10, xmm10, [rax+80]
  15295. vpshufd xmm2, xmm2, 78
  15296. vpxor xmm2, xmm2, xmm1
  15297. vpclmulqdq xmm1, xmm2, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15298. vaesenc xmm10, xmm10, [rax+96]
  15299. vaesenc xmm10, xmm10, [rax+112]
  15300. vaesenc xmm10, xmm10, [rax+128]
  15301. vpshufd xmm2, xmm2, 78
  15302. vaesenc xmm10, xmm10, [rax+144]
  15303. vpxor xmm8, xmm8, xmm3
  15304. vpxor xmm2, xmm2, xmm8
  15305. vmovdqu xmm0, OWORD PTR [rax+160]
  15306. cmp r8d, 11
  15307. jl L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last
  15308. vaesenc xmm10, xmm10, xmm0
  15309. vaesenc xmm10, xmm10, [rax+176]
  15310. vmovdqu xmm0, OWORD PTR [rax+192]
  15311. cmp r8d, 13
  15312. jl L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last
  15313. vaesenc xmm10, xmm10, xmm0
  15314. vaesenc xmm10, xmm10, [rax+208]
  15315. vmovdqu xmm0, OWORD PTR [rax+224]
  15316. L_AES_GCM_decrypt_update_avx2_aesenc_gfmul_sb_last:
  15317. vaesenclast xmm10, xmm10, xmm0
  15318. vpxor xmm6, xmm2, xmm1
  15319. vpxor xmm10, xmm10, xmm11
  15320. vmovdqu OWORD PTR [r10+rdi], xmm10
  15321. add edi, 16
  15322. cmp edi, r13d
  15323. jl L_AES_GCM_decrypt_update_avx2_last_block_start
  15324. L_AES_GCM_decrypt_update_avx2_last_block_done:
  15325. L_AES_GCM_decrypt_update_avx2_done_dec:
  15326. vmovdqu OWORD PTR [r12], xmm6
  15327. vmovdqu OWORD PTR [r15], xmm4
  15328. vzeroupper
  15329. add rsp, 168
  15330. pop rdi
  15331. pop r15
  15332. pop r14
  15333. pop r12
  15334. pop r13
  15335. ret
  15336. AES_GCM_decrypt_update_avx2 ENDP
  15337. _text ENDS
  15338. _text SEGMENT READONLY PARA
  15339. AES_GCM_decrypt_final_avx2 PROC
  15340. push r12
  15341. push r13
  15342. push r14
  15343. mov eax, DWORD PTR [rsp+64]
  15344. mov r10, QWORD PTR [rsp+72]
  15345. mov r11, QWORD PTR [rsp+80]
  15346. mov r12, QWORD PTR [rsp+88]
  15347. sub rsp, 16
  15348. vmovdqu xmm4, OWORD PTR [rcx]
  15349. vmovdqu xmm5, OWORD PTR [r10]
  15350. vmovdqu xmm6, OWORD PTR [r11]
  15351. vpsrlq xmm1, xmm5, 63
  15352. vpsllq xmm0, xmm5, 1
  15353. vpslldq xmm1, xmm1, 8
  15354. vpor xmm0, xmm0, xmm1
  15355. vpshufd xmm5, xmm5, 255
  15356. vpsrad xmm5, xmm5, 31
  15357. vpand xmm5, xmm5, OWORD PTR L_avx2_aes_gcm_mod2_128
  15358. vpxor xmm5, xmm5, xmm0
  15359. ; calc_tag
  15360. shl r9, 3
  15361. shl rax, 3
  15362. vmovq xmm0, r9
  15363. vmovq xmm1, rax
  15364. vpunpcklqdq xmm0, xmm0, xmm1
  15365. vpxor xmm0, xmm0, xmm4
  15366. ; ghash_gfmul_red
  15367. vpclmulqdq xmm7, xmm0, xmm5, 16
  15368. vpclmulqdq xmm3, xmm0, xmm5, 1
  15369. vpclmulqdq xmm2, xmm0, xmm5, 0
  15370. vpxor xmm7, xmm7, xmm3
  15371. vpslldq xmm3, xmm7, 8
  15372. vpsrldq xmm7, xmm7, 8
  15373. vpxor xmm3, xmm3, xmm2
  15374. vpclmulqdq xmm0, xmm0, xmm5, 17
  15375. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15376. vpshufd xmm3, xmm3, 78
  15377. vpxor xmm3, xmm3, xmm2
  15378. vpclmulqdq xmm2, xmm3, OWORD PTR L_avx2_aes_gcm_mod2_128, 16
  15379. vpshufd xmm3, xmm3, 78
  15380. vpxor xmm0, xmm0, xmm7
  15381. vpxor xmm0, xmm0, xmm3
  15382. vpxor xmm0, xmm0, xmm2
  15383. vpshufb xmm0, xmm0, OWORD PTR L_avx2_aes_gcm_bswap_mask
  15384. vpxor xmm0, xmm0, xmm6
  15385. ; cmp_tag
  15386. cmp r8d, 16
  15387. je L_AES_GCM_decrypt_final_avx2_cmp_tag_16
  15388. xor r13, r13
  15389. xor r10, r10
  15390. vmovdqu OWORD PTR [rsp], xmm0
  15391. L_AES_GCM_decrypt_final_avx2_cmp_tag_loop:
  15392. movzx r14d, BYTE PTR [rsp+r13]
  15393. xor r14b, BYTE PTR [rdx+r13]
  15394. or r10b, r14b
  15395. inc r13d
  15396. cmp r13d, r8d
  15397. jne L_AES_GCM_decrypt_final_avx2_cmp_tag_loop
  15398. cmp r10, 0
  15399. sete r10b
  15400. jmp L_AES_GCM_decrypt_final_avx2_cmp_tag_done
  15401. L_AES_GCM_decrypt_final_avx2_cmp_tag_16:
  15402. vmovdqu xmm1, OWORD PTR [rdx]
  15403. vpcmpeqb xmm0, xmm0, xmm1
  15404. vpmovmskb r13, xmm0
  15405. ; %%edx == 0xFFFF then return 1 else => return 0
  15406. xor r10d, r10d
  15407. cmp r13d, 65535
  15408. sete r10b
  15409. L_AES_GCM_decrypt_final_avx2_cmp_tag_done:
  15410. mov DWORD PTR [r12], r10d
  15411. vzeroupper
  15412. add rsp, 16
  15413. pop r14
  15414. pop r13
  15415. pop r12
  15416. ret
  15417. AES_GCM_decrypt_final_avx2 ENDP
  15418. _text ENDS
  15419. ENDIF
  15420. END