|
|
@@ -1,76932 +0,0 @@
|
|
|
-; /* sp_x86_64_asm
|
|
|
-; *
|
|
|
-; * Copyright (C) 2006-2023 wolfSSL Inc.
|
|
|
-; *
|
|
|
-; * This file is part of wolfSSL.
|
|
|
-; *
|
|
|
-; * wolfSSL is free software; you can redistribute it and/or modify
|
|
|
-; * it under the terms of the GNU General Public License as published by
|
|
|
-; * the Free Software Foundation; either version 2 of the License, or
|
|
|
-; * (at your option) any later version.
|
|
|
-; *
|
|
|
-; * wolfSSL is distributed in the hope that it will be useful,
|
|
|
-; * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
-; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
-; * GNU General Public License for more details.
|
|
|
-; *
|
|
|
-; * You should have received a copy of the GNU General Public License
|
|
|
-; * along with this program; if not, write to the Free Software
|
|
|
-; * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA
|
|
|
-; */
|
|
|
-IF @Version LT 1200
|
|
|
-; AVX2 instructions not recognized by old versions of MASM
|
|
|
-IFNDEF NO_AVX2_SUPPORT
|
|
|
-NO_AVX2_SUPPORT = 1
|
|
|
-ENDIF
|
|
|
-; MOVBE instruction not recognized by old versions of MASM
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-NO_MOVBE_SUPPORT = 1
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-
|
|
|
-IFNDEF HAVE_INTEL_AVX1
|
|
|
-HAVE_INTEL_AVX1 = 1
|
|
|
-ENDIF
|
|
|
-IFNDEF NO_AVX2_SUPPORT
|
|
|
-HAVE_INTEL_AVX2 = 1
|
|
|
-ENDIF
|
|
|
-
|
|
|
-IFNDEF _WIN64
|
|
|
-_WIN64 = 1
|
|
|
-ENDIF
|
|
|
-
|
|
|
-IFNDEF WOLFSSL_SP_NO_2048
|
|
|
-IFNDEF WOLFSSL_SP_NO_2048
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_from_bin_bswap PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 256
|
|
|
- xor r13, r13
|
|
|
- jmp L_2048_from_bin_bswap_64_end
|
|
|
-L_2048_from_bin_bswap_64_start:
|
|
|
- sub r11, 64
|
|
|
- mov rax, QWORD PTR [r11+56]
|
|
|
- mov r10, QWORD PTR [r11+48]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov rax, QWORD PTR [r11+40]
|
|
|
- mov r10, QWORD PTR [r11+32]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov rax, QWORD PTR [r11+24]
|
|
|
- mov r10, QWORD PTR [r11+16]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov rax, QWORD PTR [r11+8]
|
|
|
- mov r10, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_2048_from_bin_bswap_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_2048_from_bin_bswap_64_start
|
|
|
- jmp L_2048_from_bin_bswap_8_end
|
|
|
-L_2048_from_bin_bswap_8_start:
|
|
|
- sub r11, 8
|
|
|
- mov rax, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_2048_from_bin_bswap_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_2048_from_bin_bswap_8_start
|
|
|
- cmp r9, r13
|
|
|
- je L_2048_from_bin_bswap_hi_end
|
|
|
- mov r10, r13
|
|
|
- mov rax, r13
|
|
|
-L_2048_from_bin_bswap_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_2048_from_bin_bswap_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_2048_from_bin_bswap_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_2048_from_bin_bswap_zero_end
|
|
|
-L_2048_from_bin_bswap_zero_start:
|
|
|
- mov QWORD PTR [rcx], r13
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_2048_from_bin_bswap_zero_start
|
|
|
-L_2048_from_bin_bswap_zero_end:
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_from_bin_bswap ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the movbe instruction which is an optional instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_from_bin_movbe PROC
|
|
|
- push r12
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 256
|
|
|
- jmp L_2048_from_bin_movbe_64_end
|
|
|
-L_2048_from_bin_movbe_64_start:
|
|
|
- sub r11, 64
|
|
|
- movbe rax, QWORD PTR [r11+56]
|
|
|
- movbe r10, QWORD PTR [r11+48]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- movbe rax, QWORD PTR [r11+40]
|
|
|
- movbe r10, QWORD PTR [r11+32]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- movbe rax, QWORD PTR [r11+24]
|
|
|
- movbe r10, QWORD PTR [r11+16]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- movbe rax, QWORD PTR [r11+8]
|
|
|
- movbe r10, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_2048_from_bin_movbe_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_2048_from_bin_movbe_64_start
|
|
|
- jmp L_2048_from_bin_movbe_8_end
|
|
|
-L_2048_from_bin_movbe_8_start:
|
|
|
- sub r11, 8
|
|
|
- movbe rax, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_2048_from_bin_movbe_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_2048_from_bin_movbe_8_start
|
|
|
- cmp r9, 0
|
|
|
- je L_2048_from_bin_movbe_hi_end
|
|
|
- mov r10, 0
|
|
|
- mov rax, 0
|
|
|
-L_2048_from_bin_movbe_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_2048_from_bin_movbe_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_2048_from_bin_movbe_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_2048_from_bin_movbe_zero_end
|
|
|
-L_2048_from_bin_movbe_zero_start:
|
|
|
- mov QWORD PTR [rcx], 0
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_2048_from_bin_movbe_zero_start
|
|
|
-L_2048_from_bin_movbe_zero_end:
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_from_bin_movbe ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 256
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_to_bin_bswap_32 PROC
|
|
|
- mov rax, QWORD PTR [rcx+248]
|
|
|
- mov r8, QWORD PTR [rcx+240]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx], rax
|
|
|
- mov QWORD PTR [rdx+8], r8
|
|
|
- mov rax, QWORD PTR [rcx+232]
|
|
|
- mov r8, QWORD PTR [rcx+224]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+16], rax
|
|
|
- mov QWORD PTR [rdx+24], r8
|
|
|
- mov rax, QWORD PTR [rcx+216]
|
|
|
- mov r8, QWORD PTR [rcx+208]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+32], rax
|
|
|
- mov QWORD PTR [rdx+40], r8
|
|
|
- mov rax, QWORD PTR [rcx+200]
|
|
|
- mov r8, QWORD PTR [rcx+192]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+48], rax
|
|
|
- mov QWORD PTR [rdx+56], r8
|
|
|
- mov rax, QWORD PTR [rcx+184]
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+64], rax
|
|
|
- mov QWORD PTR [rdx+72], r8
|
|
|
- mov rax, QWORD PTR [rcx+168]
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+80], rax
|
|
|
- mov QWORD PTR [rdx+88], r8
|
|
|
- mov rax, QWORD PTR [rcx+152]
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+96], rax
|
|
|
- mov QWORD PTR [rdx+104], r8
|
|
|
- mov rax, QWORD PTR [rcx+136]
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+112], rax
|
|
|
- mov QWORD PTR [rdx+120], r8
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+128], rax
|
|
|
- mov QWORD PTR [rdx+136], r8
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+144], rax
|
|
|
- mov QWORD PTR [rdx+152], r8
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+160], rax
|
|
|
- mov QWORD PTR [rdx+168], r8
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+176], rax
|
|
|
- mov QWORD PTR [rdx+184], r8
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+192], rax
|
|
|
- mov QWORD PTR [rdx+200], r8
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+208], rax
|
|
|
- mov QWORD PTR [rdx+216], r8
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+224], rax
|
|
|
- mov QWORD PTR [rdx+232], r8
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+240], rax
|
|
|
- mov QWORD PTR [rdx+248], r8
|
|
|
- ret
|
|
|
-sp_2048_to_bin_bswap_32 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 256
|
|
|
-; * Uses the movbe instruction which is optional.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_to_bin_movbe_32 PROC
|
|
|
- movbe rax, QWORD PTR [rcx+248]
|
|
|
- movbe r8, QWORD PTR [rcx+240]
|
|
|
- mov QWORD PTR [rdx], rax
|
|
|
- mov QWORD PTR [rdx+8], r8
|
|
|
- movbe rax, QWORD PTR [rcx+232]
|
|
|
- movbe r8, QWORD PTR [rcx+224]
|
|
|
- mov QWORD PTR [rdx+16], rax
|
|
|
- mov QWORD PTR [rdx+24], r8
|
|
|
- movbe rax, QWORD PTR [rcx+216]
|
|
|
- movbe r8, QWORD PTR [rcx+208]
|
|
|
- mov QWORD PTR [rdx+32], rax
|
|
|
- mov QWORD PTR [rdx+40], r8
|
|
|
- movbe rax, QWORD PTR [rcx+200]
|
|
|
- movbe r8, QWORD PTR [rcx+192]
|
|
|
- mov QWORD PTR [rdx+48], rax
|
|
|
- mov QWORD PTR [rdx+56], r8
|
|
|
- movbe rax, QWORD PTR [rcx+184]
|
|
|
- movbe r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rdx+64], rax
|
|
|
- mov QWORD PTR [rdx+72], r8
|
|
|
- movbe rax, QWORD PTR [rcx+168]
|
|
|
- movbe r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rdx+80], rax
|
|
|
- mov QWORD PTR [rdx+88], r8
|
|
|
- movbe rax, QWORD PTR [rcx+152]
|
|
|
- movbe r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rdx+96], rax
|
|
|
- mov QWORD PTR [rdx+104], r8
|
|
|
- movbe rax, QWORD PTR [rcx+136]
|
|
|
- movbe r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rdx+112], rax
|
|
|
- mov QWORD PTR [rdx+120], r8
|
|
|
- movbe rax, QWORD PTR [rcx+120]
|
|
|
- movbe r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rdx+128], rax
|
|
|
- mov QWORD PTR [rdx+136], r8
|
|
|
- movbe rax, QWORD PTR [rcx+104]
|
|
|
- movbe r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rdx+144], rax
|
|
|
- mov QWORD PTR [rdx+152], r8
|
|
|
- movbe rax, QWORD PTR [rcx+88]
|
|
|
- movbe r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rdx+160], rax
|
|
|
- mov QWORD PTR [rdx+168], r8
|
|
|
- movbe rax, QWORD PTR [rcx+72]
|
|
|
- movbe r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rdx+176], rax
|
|
|
- mov QWORD PTR [rdx+184], r8
|
|
|
- movbe rax, QWORD PTR [rcx+56]
|
|
|
- movbe r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rdx+192], rax
|
|
|
- mov QWORD PTR [rdx+200], r8
|
|
|
- movbe rax, QWORD PTR [rcx+40]
|
|
|
- movbe r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rdx+208], rax
|
|
|
- mov QWORD PTR [rdx+216], r8
|
|
|
- movbe rax, QWORD PTR [rcx+24]
|
|
|
- movbe r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rdx+224], rax
|
|
|
- mov QWORD PTR [rdx+232], r8
|
|
|
- movbe rax, QWORD PTR [rcx+8]
|
|
|
- movbe r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rdx+240], rax
|
|
|
- mov QWORD PTR [rdx+248], r8
|
|
|
- ret
|
|
|
-sp_2048_to_bin_movbe_32 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mul_16 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 128
|
|
|
- ; A[0] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r11, rdx
|
|
|
- ; A[0] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- ; A[0] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+16], r12
|
|
|
- ; A[0] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- ; A[0] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- ; A[0] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- ; A[0] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- ; A[0] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- ; A[0] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+64], r12
|
|
|
- ; A[0] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+72], r10
|
|
|
- ; A[0] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+80], r11
|
|
|
- ; A[0] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+88], r12
|
|
|
- ; A[0] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[12] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- ; A[0] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- ; A[0] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[14] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+112], r12
|
|
|
- ; A[0] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[12] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[14] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[15] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+120], r10
|
|
|
- ; A[1] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[15] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- ; A[2] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[14] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- ; A[3] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[12] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[14] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[15] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- ; A[4] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[15] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- ; A[5] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[14] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- ; A[6] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[12] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[14] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[15] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- ; A[7] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[15] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- ; A[8] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[14] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- ; A[9] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[12] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[14] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[15] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- ; A[10] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[15] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- ; A[11] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[14] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+208], r12
|
|
|
- ; A[12] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[14] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[15] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- ; A[13] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[15] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- ; A[14] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- ; A[15] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r10, QWORD PTR [rsp+16]
|
|
|
- mov r11, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- mov rdx, QWORD PTR [rsp+40]
|
|
|
- mov r10, QWORD PTR [rsp+48]
|
|
|
- mov r11, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], rdx
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rsp+64]
|
|
|
- mov rdx, QWORD PTR [rsp+72]
|
|
|
- mov r10, QWORD PTR [rsp+80]
|
|
|
- mov r11, QWORD PTR [rsp+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], rdx
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov rax, QWORD PTR [rsp+96]
|
|
|
- mov rdx, QWORD PTR [rsp+104]
|
|
|
- mov r10, QWORD PTR [rsp+112]
|
|
|
- mov r11, QWORD PTR [rsp+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], rdx
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- add rsp, 128
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_mul_16 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r Result of multiplication.
|
|
|
-; * a First number to multiply.
|
|
|
-; * b Second number to multiply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mul_avx2_16 PROC
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- mov rbp, r8
|
|
|
- mov r8, rcx
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 128
|
|
|
- cmp r9, r8
|
|
|
- mov rbx, rsp
|
|
|
- cmovne rbx, r8
|
|
|
- cmp rbp, r8
|
|
|
- cmove rbx, rsp
|
|
|
- add r8, 128
|
|
|
- xor rdi, rdi
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- ; A[0] * B[0]
|
|
|
- mulx r11, r10, QWORD PTR [rbp]
|
|
|
- ; A[0] * B[1]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx], r10
|
|
|
- adcx r11, rax
|
|
|
- ; A[0] * B[2]
|
|
|
- mulx r13, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+8], r11
|
|
|
- adcx r12, rax
|
|
|
- ; A[0] * B[3]
|
|
|
- mulx r14, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r13, rax
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- ; A[0] * B[4]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r14, rax
|
|
|
- ; A[0] * B[5]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+32], r14
|
|
|
- adcx r10, rax
|
|
|
- ; A[0] * B[6]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- adcx r11, rax
|
|
|
- ; A[0] * B[7]
|
|
|
- mulx r13, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- ; A[0] * B[8]
|
|
|
- mulx r14, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r13, rax
|
|
|
- ; A[0] * B[9]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- ; A[0] * B[10]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- ; A[0] * B[11]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- ; A[0] * B[12]
|
|
|
- mulx r13, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r12, rax
|
|
|
- ; A[0] * B[13]
|
|
|
- mulx r14, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- ; A[0] * B[14]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- ; A[0] * B[15]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rdi
|
|
|
- mov r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mov r11, QWORD PTR [rbx+8]
|
|
|
- mov r12, QWORD PTR [rbx+16]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r14, QWORD PTR [rbx+32]
|
|
|
- mov r10, QWORD PTR [rbx+40]
|
|
|
- ; A[1] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[1] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[1] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+32], r14
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- ; A[1] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[1] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- ; A[1] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[1] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[1] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[1] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[1] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- mov r12, rdi
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mov r12, QWORD PTR [rbx+16]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r14, QWORD PTR [rbx+32]
|
|
|
- mov r10, QWORD PTR [rbx+40]
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- ; A[2] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[2] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[2] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[2] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+32], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- ; A[2] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[2] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[2] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- ; A[2] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[2] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[2] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[2] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[2] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[2] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r13, rdi
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- adcx r13, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r14, QWORD PTR [rbx+32]
|
|
|
- mov r10, QWORD PTR [rbx+40]
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- ; A[3] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[3] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[3] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+32], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[3] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- ; A[3] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[3] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[3] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[3] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- ; A[3] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[3] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[3] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[3] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- ; A[3] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[3] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[3] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[3] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov r14, rdi
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- adcx r14, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mov r14, QWORD PTR [rbx+32]
|
|
|
- mov r10, QWORD PTR [rbx+40]
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- ; A[4] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+32], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[4] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- ; A[4] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[4] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[4] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[4] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[4] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- ; A[4] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[4] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[4] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[4] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov r10, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mov r10, QWORD PTR [rbx+40]
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- ; A[5] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[5] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[5] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- ; A[5] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[5] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[5] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[5] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- ; A[5] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[5] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[5] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- mov r11, rdi
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- ; A[6] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[6] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[6] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- ; A[6] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[6] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- ; A[6] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[6] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[6] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[6] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[6] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov r12, rdi
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- ; A[7] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[7] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[7] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- ; A[7] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[7] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[7] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- ; A[7] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[7] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[7] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- ; A[7] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[7] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov r13, rdi
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- adcx r13, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- ; A[8] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[8] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[8] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[8] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[8] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[8] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[8] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[8] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- ; A[8] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[8] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[8] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[8] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- ; A[8] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[8] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[8] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[8] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov r14, rdi
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- adcx r14, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- ; A[9] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[9] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[9] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[9] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[9] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[9] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[9] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[9] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[9] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[9] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[9] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[9] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- ; A[9] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[9] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[9] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[9] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- mov r10, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- ; A[10] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[10] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[10] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[10] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- ; A[10] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[10] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[10] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[10] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- ; A[10] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[10] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[10] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[10] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- ; A[10] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[10] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[10] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[10] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- mov r11, rdi
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- ; A[11] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[11] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[11] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[11] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- ; A[11] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[11] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[11] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[11] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- ; A[11] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[11] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[11] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[11] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- ; A[11] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[11] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[11] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[11] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov r12, rdi
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- mov rdx, QWORD PTR [r9+96]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[12] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[12] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[12] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[12] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- ; A[12] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[12] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[12] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[12] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- ; A[12] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[12] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[12] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[12] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- mov r12, QWORD PTR [r8+88]
|
|
|
- ; A[12] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[12] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[12] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[12] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- mov r13, rdi
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- adcx r13, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- mov QWORD PTR [r8+96], r13
|
|
|
- mov rdx, QWORD PTR [r9+104]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[13] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[13] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[13] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[13] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[13] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[13] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[13] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[13] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- ; A[13] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[13] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[13] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[13] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- mov r12, QWORD PTR [r8+88]
|
|
|
- mov r13, QWORD PTR [r8+96]
|
|
|
- ; A[13] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[13] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[13] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[13] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- mov r14, rdi
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- adcx r14, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+96], r13
|
|
|
- mov QWORD PTR [r8+104], r14
|
|
|
- mov rdx, QWORD PTR [r9+112]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- ; A[14] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[14] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[14] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[14] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- ; A[14] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[14] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[14] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[14] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- ; A[14] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[14] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[14] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[14] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov r12, QWORD PTR [r8+88]
|
|
|
- mov r13, QWORD PTR [r8+96]
|
|
|
- mov r14, QWORD PTR [r8+104]
|
|
|
- ; A[14] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[14] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[14] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[14] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+96], r13
|
|
|
- mov r10, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+104], r14
|
|
|
- mov QWORD PTR [r8+112], r10
|
|
|
- mov rdx, QWORD PTR [r9+120]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- ; A[15] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[15] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[15] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- ; A[15] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[15] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[15] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- mov r12, QWORD PTR [r8+88]
|
|
|
- ; A[15] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[15] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[15] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- mov r13, QWORD PTR [r8+96]
|
|
|
- mov r14, QWORD PTR [r8+104]
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- ; A[15] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[15] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[15] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+96], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[15] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+104], r14
|
|
|
- mov r11, rdi
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r15
|
|
|
- mov QWORD PTR [r8+112], r10
|
|
|
- mov QWORD PTR [r8+120], r11
|
|
|
- sub r8, 128
|
|
|
- cmp r9, r8
|
|
|
- je L_start_2048_mul_avx2_16
|
|
|
- cmp rbp, r8
|
|
|
- jne L_end_2048_mul_avx2_16
|
|
|
-L_start_2048_mul_avx2_16:
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx]
|
|
|
- vmovups OWORD PTR [r8], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+16]
|
|
|
- vmovups OWORD PTR [r8+16], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+32]
|
|
|
- vmovups OWORD PTR [r8+32], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+48]
|
|
|
- vmovups OWORD PTR [r8+48], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+64]
|
|
|
- vmovups OWORD PTR [r8+64], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+80]
|
|
|
- vmovups OWORD PTR [r8+80], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+96]
|
|
|
- vmovups OWORD PTR [r8+96], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+112]
|
|
|
- vmovups OWORD PTR [r8+112], xmm0
|
|
|
-L_end_2048_mul_avx2_16:
|
|
|
- add rsp, 128
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- ret
|
|
|
-sp_2048_mul_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Add b to a into r. (r = a + b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_add_16 PROC
|
|
|
- ; Add
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- xor rax, rax
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- adc r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- adc r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- adc r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- adc r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- adc r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- adc r9, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- adc r10, QWORD PTR [r8+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r9, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+80], r9
|
|
|
- adc r10, QWORD PTR [r8+88]
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- adc r9, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [rcx+96], r9
|
|
|
- adc r10, QWORD PTR [r8+104]
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- adc r9, QWORD PTR [r8+112]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+112], r9
|
|
|
- adc r10, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc rax, 0
|
|
|
- ret
|
|
|
-sp_2048_add_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Sub b from a into a. (a -= b)
|
|
|
-; *
|
|
|
-; * a A single precision integer and result.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_sub_in_place_32 PROC
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- sub r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb r9, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb r9, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb r9, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb r9, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb r9, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb r9, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb r9, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb r9, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov r9, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- sbb r9, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], r9
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov r9, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- sbb r9, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], r9
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov r9, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- sbb r9, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], r9
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov r9, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- sbb r9, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rcx+192]
|
|
|
- mov QWORD PTR [rcx+184], r9
|
|
|
- sbb r8, QWORD PTR [rdx+192]
|
|
|
- mov r9, QWORD PTR [rcx+200]
|
|
|
- mov QWORD PTR [rcx+192], r8
|
|
|
- sbb r9, QWORD PTR [rdx+200]
|
|
|
- mov r8, QWORD PTR [rcx+208]
|
|
|
- mov QWORD PTR [rcx+200], r9
|
|
|
- sbb r8, QWORD PTR [rdx+208]
|
|
|
- mov r9, QWORD PTR [rcx+216]
|
|
|
- mov QWORD PTR [rcx+208], r8
|
|
|
- sbb r9, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [rcx+224]
|
|
|
- mov QWORD PTR [rcx+216], r9
|
|
|
- sbb r8, QWORD PTR [rdx+224]
|
|
|
- mov r9, QWORD PTR [rcx+232]
|
|
|
- mov QWORD PTR [rcx+224], r8
|
|
|
- sbb r9, QWORD PTR [rdx+232]
|
|
|
- mov r8, QWORD PTR [rcx+240]
|
|
|
- mov QWORD PTR [rcx+232], r9
|
|
|
- sbb r8, QWORD PTR [rdx+240]
|
|
|
- mov r9, QWORD PTR [rcx+248]
|
|
|
- mov QWORD PTR [rcx+240], r8
|
|
|
- sbb r9, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [rcx+248], r9
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_2048_sub_in_place_32 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Add b to a into r. (r = a + b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_add_32 PROC
|
|
|
- ; Add
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- xor rax, rax
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- adc r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- adc r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- adc r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- adc r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- adc r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- adc r9, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- adc r10, QWORD PTR [r8+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r9, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+80], r9
|
|
|
- adc r10, QWORD PTR [r8+88]
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- adc r9, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [rcx+96], r9
|
|
|
- adc r10, QWORD PTR [r8+104]
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- adc r9, QWORD PTR [r8+112]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+112], r9
|
|
|
- adc r10, QWORD PTR [r8+120]
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc r9, QWORD PTR [r8+128]
|
|
|
- mov r10, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [rcx+128], r9
|
|
|
- adc r10, QWORD PTR [r8+136]
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [rcx+136], r10
|
|
|
- adc r9, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [rcx+144], r9
|
|
|
- adc r10, QWORD PTR [r8+152]
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [rcx+152], r10
|
|
|
- adc r9, QWORD PTR [r8+160]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [rcx+160], r9
|
|
|
- adc r10, QWORD PTR [r8+168]
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- adc r9, QWORD PTR [r8+176]
|
|
|
- mov r10, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [rcx+176], r9
|
|
|
- adc r10, QWORD PTR [r8+184]
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- mov QWORD PTR [rcx+184], r10
|
|
|
- adc r9, QWORD PTR [r8+192]
|
|
|
- mov r10, QWORD PTR [rdx+200]
|
|
|
- mov QWORD PTR [rcx+192], r9
|
|
|
- adc r10, QWORD PTR [r8+200]
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- mov QWORD PTR [rcx+200], r10
|
|
|
- adc r9, QWORD PTR [r8+208]
|
|
|
- mov r10, QWORD PTR [rdx+216]
|
|
|
- mov QWORD PTR [rcx+208], r9
|
|
|
- adc r10, QWORD PTR [r8+216]
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- adc r9, QWORD PTR [r8+224]
|
|
|
- mov r10, QWORD PTR [rdx+232]
|
|
|
- mov QWORD PTR [rcx+224], r9
|
|
|
- adc r10, QWORD PTR [r8+232]
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- mov QWORD PTR [rcx+232], r10
|
|
|
- adc r9, QWORD PTR [r8+240]
|
|
|
- mov r10, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [rcx+240], r9
|
|
|
- adc r10, QWORD PTR [r8+248]
|
|
|
- mov QWORD PTR [rcx+248], r10
|
|
|
- adc rax, 0
|
|
|
- ret
|
|
|
-sp_2048_add_32 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mul_32 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- sub rsp, 808
|
|
|
- mov QWORD PTR [rsp+768], rcx
|
|
|
- mov QWORD PTR [rsp+776], rdx
|
|
|
- mov QWORD PTR [rsp+784], r8
|
|
|
- lea r12, QWORD PTR [rsp+512]
|
|
|
- lea r14, QWORD PTR [rdx+128]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- xor r15, r15
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+792], r15
|
|
|
- lea r13, QWORD PTR [rsp+640]
|
|
|
- lea r14, QWORD PTR [r8+128]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- xor rdi, rdi
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [r8+8]
|
|
|
- mov QWORD PTR [r13], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [r13+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [r8+32]
|
|
|
- mov QWORD PTR [r13+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mov QWORD PTR [r13+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [r13+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mov QWORD PTR [r13+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [r8+80]
|
|
|
- mov QWORD PTR [r13+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mov QWORD PTR [r13+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [r8+104]
|
|
|
- mov QWORD PTR [r13+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov QWORD PTR [r13+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [r13+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov QWORD PTR [r13+120], rax
|
|
|
- adc rdi, 0
|
|
|
- mov QWORD PTR [rsp+800], rdi
|
|
|
- mov r8, r13
|
|
|
- mov rdx, r12
|
|
|
- mov rcx, rsp
|
|
|
- call sp_2048_mul_16
|
|
|
- mov r8, QWORD PTR [rsp+784]
|
|
|
- mov rdx, QWORD PTR [rsp+776]
|
|
|
- lea rcx, QWORD PTR [rsp+256]
|
|
|
- add r8, 128
|
|
|
- add rdx, 128
|
|
|
- call sp_2048_mul_16
|
|
|
- mov r8, QWORD PTR [rsp+784]
|
|
|
- mov rdx, QWORD PTR [rsp+776]
|
|
|
- mov rcx, QWORD PTR [rsp+768]
|
|
|
- call sp_2048_mul_16
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, QWORD PTR [rsp+784]
|
|
|
- mov rdx, QWORD PTR [rsp+776]
|
|
|
- mov rcx, QWORD PTR [rsp+768]
|
|
|
-ENDIF
|
|
|
- mov r15, QWORD PTR [rsp+792]
|
|
|
- mov rdi, QWORD PTR [rsp+800]
|
|
|
- mov rsi, QWORD PTR [rsp+768]
|
|
|
- mov r11, r15
|
|
|
- lea r12, QWORD PTR [rsp+512]
|
|
|
- lea r13, QWORD PTR [rsp+640]
|
|
|
- and r11, rdi
|
|
|
- neg r15
|
|
|
- neg rdi
|
|
|
- add rsi, 256
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [r13]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- mov QWORD PTR [r13], r9
|
|
|
- mov rax, QWORD PTR [r12+8]
|
|
|
- mov r9, QWORD PTR [r13+8]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+8], rax
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- mov rax, QWORD PTR [r12+16]
|
|
|
- mov r9, QWORD PTR [r13+16]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+16], rax
|
|
|
- mov QWORD PTR [r13+16], r9
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [r13+24]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- mov QWORD PTR [r13+24], r9
|
|
|
- mov rax, QWORD PTR [r12+32]
|
|
|
- mov r9, QWORD PTR [r13+32]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+32], rax
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- mov rax, QWORD PTR [r12+40]
|
|
|
- mov r9, QWORD PTR [r13+40]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+40], rax
|
|
|
- mov QWORD PTR [r13+40], r9
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [r13+48]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- mov QWORD PTR [r13+48], r9
|
|
|
- mov rax, QWORD PTR [r12+56]
|
|
|
- mov r9, QWORD PTR [r13+56]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+56], rax
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- mov rax, QWORD PTR [r12+64]
|
|
|
- mov r9, QWORD PTR [r13+64]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+64], rax
|
|
|
- mov QWORD PTR [r13+64], r9
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [r13+72]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- mov QWORD PTR [r13+72], r9
|
|
|
- mov rax, QWORD PTR [r12+80]
|
|
|
- mov r9, QWORD PTR [r13+80]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+80], rax
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- mov rax, QWORD PTR [r12+88]
|
|
|
- mov r9, QWORD PTR [r13+88]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+88], rax
|
|
|
- mov QWORD PTR [r13+88], r9
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [r13+96]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- mov QWORD PTR [r13+96], r9
|
|
|
- mov rax, QWORD PTR [r12+104]
|
|
|
- mov r9, QWORD PTR [r13+104]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+104], rax
|
|
|
- mov QWORD PTR [r13+104], r9
|
|
|
- mov rax, QWORD PTR [r12+112]
|
|
|
- mov r9, QWORD PTR [r13+112]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+112], rax
|
|
|
- mov QWORD PTR [r13+112], r9
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [r13+120]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- mov QWORD PTR [r13+120], r9
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r13+120]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r11, 0
|
|
|
- lea r13, QWORD PTR [rsp+256]
|
|
|
- mov r12, rsp
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [r13+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [r13+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [r13+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [r13+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [r13+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [r13+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [r13+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [r13+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [r13+248]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r11, 0
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [rcx+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [rcx+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [rcx+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [rcx+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [rcx+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [rcx+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [rcx+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [rcx+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [rcx+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [rcx+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [rcx+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [rcx+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [rcx+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [rcx+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [rcx+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [rcx+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [rcx+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [rcx+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [rcx+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [rcx+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [rcx+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [rcx+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [rcx+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [rcx+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [rcx+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [rcx+248]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r11, 0
|
|
|
- sub rsi, 128
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r12+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r12+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r12+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r12+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r12+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r12+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r12+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r12+184]
|
|
|
- mov rax, QWORD PTR [rsi+192]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, QWORD PTR [r12+192]
|
|
|
- mov r9, QWORD PTR [rsi+200]
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- adc r9, QWORD PTR [r12+200]
|
|
|
- mov r10, QWORD PTR [rsi+208]
|
|
|
- mov QWORD PTR [rsi+200], r9
|
|
|
- adc r10, QWORD PTR [r12+208]
|
|
|
- mov rax, QWORD PTR [rsi+216]
|
|
|
- mov QWORD PTR [rsi+208], r10
|
|
|
- adc rax, QWORD PTR [r12+216]
|
|
|
- mov r9, QWORD PTR [rsi+224]
|
|
|
- mov QWORD PTR [rsi+216], rax
|
|
|
- adc r9, QWORD PTR [r12+224]
|
|
|
- mov r10, QWORD PTR [rsi+232]
|
|
|
- mov QWORD PTR [rsi+224], r9
|
|
|
- adc r10, QWORD PTR [r12+232]
|
|
|
- mov rax, QWORD PTR [rsi+240]
|
|
|
- mov QWORD PTR [rsi+232], r10
|
|
|
- adc rax, QWORD PTR [r12+240]
|
|
|
- mov r9, QWORD PTR [rsi+248]
|
|
|
- mov QWORD PTR [rsi+240], rax
|
|
|
- adc r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [rsi+248], r9
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+384], r11
|
|
|
- add rsi, 128
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r13+128]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- ; Add to zero
|
|
|
- mov rax, QWORD PTR [r13+136]
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+144]
|
|
|
- mov QWORD PTR [rsi+136], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+152]
|
|
|
- mov QWORD PTR [rsi+144], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+160]
|
|
|
- mov QWORD PTR [rsi+152], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+168]
|
|
|
- mov QWORD PTR [rsi+160], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+176]
|
|
|
- mov QWORD PTR [rsi+168], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+184]
|
|
|
- mov QWORD PTR [rsi+176], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+192]
|
|
|
- mov QWORD PTR [rsi+184], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+200]
|
|
|
- mov QWORD PTR [rsi+192], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+208]
|
|
|
- mov QWORD PTR [rsi+200], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+216]
|
|
|
- mov QWORD PTR [rsi+208], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+224]
|
|
|
- mov QWORD PTR [rsi+216], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+232]
|
|
|
- mov QWORD PTR [rsi+224], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+240]
|
|
|
- mov QWORD PTR [rsi+232], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+248]
|
|
|
- mov QWORD PTR [rsi+240], r9
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsi+248], r10
|
|
|
- add rsp, 808
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_mul_32 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mul_avx2_32 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- sub rsp, 808
|
|
|
- mov QWORD PTR [rsp+768], rcx
|
|
|
- mov QWORD PTR [rsp+776], rdx
|
|
|
- mov QWORD PTR [rsp+784], r8
|
|
|
- lea r12, QWORD PTR [rsp+512]
|
|
|
- lea r14, QWORD PTR [rdx+128]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- xor r15, r15
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+792], r15
|
|
|
- lea r13, QWORD PTR [rsp+640]
|
|
|
- lea r14, QWORD PTR [r8+128]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- xor rdi, rdi
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [r8+8]
|
|
|
- mov QWORD PTR [r13], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [r13+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [r8+32]
|
|
|
- mov QWORD PTR [r13+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mov QWORD PTR [r13+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [r13+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mov QWORD PTR [r13+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [r8+80]
|
|
|
- mov QWORD PTR [r13+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mov QWORD PTR [r13+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [r8+104]
|
|
|
- mov QWORD PTR [r13+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov QWORD PTR [r13+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [r13+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov QWORD PTR [r13+120], rax
|
|
|
- adc rdi, 0
|
|
|
- mov QWORD PTR [rsp+800], rdi
|
|
|
- mov r8, r13
|
|
|
- mov rdx, r12
|
|
|
- mov rcx, rsp
|
|
|
- call sp_2048_mul_avx2_16
|
|
|
- mov r8, QWORD PTR [rsp+784]
|
|
|
- mov rdx, QWORD PTR [rsp+776]
|
|
|
- lea rcx, QWORD PTR [rsp+256]
|
|
|
- add r8, 128
|
|
|
- add rdx, 128
|
|
|
- call sp_2048_mul_avx2_16
|
|
|
- mov r8, QWORD PTR [rsp+784]
|
|
|
- mov rdx, QWORD PTR [rsp+776]
|
|
|
- mov rcx, QWORD PTR [rsp+768]
|
|
|
- call sp_2048_mul_avx2_16
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, QWORD PTR [rsp+784]
|
|
|
- mov rdx, QWORD PTR [rsp+776]
|
|
|
- mov rcx, QWORD PTR [rsp+768]
|
|
|
-ENDIF
|
|
|
- mov r15, QWORD PTR [rsp+792]
|
|
|
- mov rdi, QWORD PTR [rsp+800]
|
|
|
- mov rsi, QWORD PTR [rsp+768]
|
|
|
- mov r11, r15
|
|
|
- lea r12, QWORD PTR [rsp+512]
|
|
|
- lea r13, QWORD PTR [rsp+640]
|
|
|
- and r11, rdi
|
|
|
- neg r15
|
|
|
- neg rdi
|
|
|
- add rsi, 256
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [r13]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- add rax, r9
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [r13+8]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [r13+16]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [r13+24]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [r13+32]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [r13+40]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [r13+48]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [r13+56]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [r13+64]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [r13+72]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [r13+80]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [r13+88]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [r13+96]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov r10, QWORD PTR [r13+104]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov rax, QWORD PTR [r13+112]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [r13+120]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, r9
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r11, 0
|
|
|
- lea r13, QWORD PTR [rsp+256]
|
|
|
- mov r12, rsp
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [r13+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [r13+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [r13+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [r13+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [r13+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [r13+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [r13+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [r13+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [r13+248]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r11, 0
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [rcx+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [rcx+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [rcx+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [rcx+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [rcx+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [rcx+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [rcx+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [rcx+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [rcx+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [rcx+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [rcx+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [rcx+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [rcx+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [rcx+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [rcx+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [rcx+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [rcx+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [rcx+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [rcx+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [rcx+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [rcx+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [rcx+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [rcx+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [rcx+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [rcx+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [rcx+248]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r11, 0
|
|
|
- sub rsi, 128
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r12+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r12+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r12+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r12+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r12+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r12+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r12+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r12+184]
|
|
|
- mov rax, QWORD PTR [rsi+192]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, QWORD PTR [r12+192]
|
|
|
- mov r9, QWORD PTR [rsi+200]
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- adc r9, QWORD PTR [r12+200]
|
|
|
- mov r10, QWORD PTR [rsi+208]
|
|
|
- mov QWORD PTR [rsi+200], r9
|
|
|
- adc r10, QWORD PTR [r12+208]
|
|
|
- mov rax, QWORD PTR [rsi+216]
|
|
|
- mov QWORD PTR [rsi+208], r10
|
|
|
- adc rax, QWORD PTR [r12+216]
|
|
|
- mov r9, QWORD PTR [rsi+224]
|
|
|
- mov QWORD PTR [rsi+216], rax
|
|
|
- adc r9, QWORD PTR [r12+224]
|
|
|
- mov r10, QWORD PTR [rsi+232]
|
|
|
- mov QWORD PTR [rsi+224], r9
|
|
|
- adc r10, QWORD PTR [r12+232]
|
|
|
- mov rax, QWORD PTR [rsi+240]
|
|
|
- mov QWORD PTR [rsi+232], r10
|
|
|
- adc rax, QWORD PTR [r12+240]
|
|
|
- mov r9, QWORD PTR [rsi+248]
|
|
|
- mov QWORD PTR [rsi+240], rax
|
|
|
- adc r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [rsi+248], r9
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+384], r11
|
|
|
- add rsi, 128
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r13+128]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- ; Add to zero
|
|
|
- mov rax, QWORD PTR [r13+136]
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+144]
|
|
|
- mov QWORD PTR [rsi+136], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+152]
|
|
|
- mov QWORD PTR [rsi+144], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+160]
|
|
|
- mov QWORD PTR [rsi+152], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+168]
|
|
|
- mov QWORD PTR [rsi+160], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+176]
|
|
|
- mov QWORD PTR [rsi+168], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+184]
|
|
|
- mov QWORD PTR [rsi+176], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+192]
|
|
|
- mov QWORD PTR [rsi+184], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+200]
|
|
|
- mov QWORD PTR [rsi+192], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+208]
|
|
|
- mov QWORD PTR [rsi+200], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+216]
|
|
|
- mov QWORD PTR [rsi+208], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+224]
|
|
|
- mov QWORD PTR [rsi+216], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+232]
|
|
|
- mov QWORD PTR [rsi+224], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+240]
|
|
|
- mov QWORD PTR [rsi+232], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+248]
|
|
|
- mov QWORD PTR [rsi+240], r9
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsi+248], r10
|
|
|
- add rsp, 808
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_mul_avx2_32 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_sqr_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- mov r8, rdx
|
|
|
- sub rsp, 128
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul rax
|
|
|
- xor r11, r11
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r10, rdx
|
|
|
- ; A[0] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsp+8], r10
|
|
|
- ; A[0] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+16], r11
|
|
|
- ; A[0] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+24], r9
|
|
|
- ; A[0] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[1] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- ; A[0] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- ; A[0] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rsp+48], r9
|
|
|
- ; A[0] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rsp+56], r10
|
|
|
- ; A[0] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+64], r11
|
|
|
- ; A[0] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rsp+72], r9
|
|
|
- ; A[0] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- ; A[0] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- ; A[0] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rsp+96], r9
|
|
|
- ; A[0] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rsp+104], r10
|
|
|
- ; A[0] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+112], r11
|
|
|
- ; A[0] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rsp+120], r9
|
|
|
- ; A[1] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[2] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rcx+128], r10
|
|
|
- ; A[2] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[3] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rcx+136], r11
|
|
|
- ; A[3] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[4] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[9] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rcx+144], r9
|
|
|
- ; A[4] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[5] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[9] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rcx+152], r10
|
|
|
- ; A[5] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[6] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[9] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[10] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rcx+160], r11
|
|
|
- ; A[6] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[7] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[9] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[10] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+80]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rcx+168], r9
|
|
|
- ; A[7] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[8] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[9] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[10] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+80]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[11] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rcx+176], r10
|
|
|
- ; A[8] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[9] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[10] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+80]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[11] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+88]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rcx+184], r11
|
|
|
- ; A[9] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[10] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+80]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[11] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+88]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[12] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rcx+192], r9
|
|
|
- ; A[10] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+80]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[11] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+88]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[12] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+96]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rcx+200], r10
|
|
|
- ; A[11] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+88]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+96]
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+208], r11
|
|
|
- ; A[12] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+96]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+104]
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+216], r9
|
|
|
- ; A[13] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+104]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[14] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rcx+224], r10
|
|
|
- ; A[14] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+112]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+232], r11
|
|
|
- ; A[15] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul rax
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- mov QWORD PTR [rcx+240], r9
|
|
|
- mov QWORD PTR [rcx+248], r10
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r12, QWORD PTR [rsp+16]
|
|
|
- mov r13, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- mov QWORD PTR [rcx+24], r13
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- mov rdx, QWORD PTR [rsp+40]
|
|
|
- mov r12, QWORD PTR [rsp+48]
|
|
|
- mov r13, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], rdx
|
|
|
- mov QWORD PTR [rcx+48], r12
|
|
|
- mov QWORD PTR [rcx+56], r13
|
|
|
- mov rax, QWORD PTR [rsp+64]
|
|
|
- mov rdx, QWORD PTR [rsp+72]
|
|
|
- mov r12, QWORD PTR [rsp+80]
|
|
|
- mov r13, QWORD PTR [rsp+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], rdx
|
|
|
- mov QWORD PTR [rcx+80], r12
|
|
|
- mov QWORD PTR [rcx+88], r13
|
|
|
- mov rax, QWORD PTR [rsp+96]
|
|
|
- mov rdx, QWORD PTR [rsp+104]
|
|
|
- mov r12, QWORD PTR [rsp+112]
|
|
|
- mov r13, QWORD PTR [rsp+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], rdx
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- mov QWORD PTR [rcx+120], r13
|
|
|
- add rsp, 128
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_sqr_16 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_sqr_avx2_16 PROC
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov r8, rcx
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 128
|
|
|
- cmp r9, r8
|
|
|
- mov rbp, rsp
|
|
|
- cmovne rbp, r8
|
|
|
- add r8, 128
|
|
|
- xor r13, r13
|
|
|
- ; Diagonal 1
|
|
|
- ; Zero into %r9
|
|
|
- ; Zero into %r10
|
|
|
- ; A[1] x A[0]
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- mulx r11, r10, QWORD PTR [r9+8]
|
|
|
- ; A[2] x A[0]
|
|
|
- mulx r12, rax, QWORD PTR [r9+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [rbp+8], r10
|
|
|
- mov QWORD PTR [rbp+16], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[3] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- ; A[4] x A[0]
|
|
|
- mulx r11, rax, QWORD PTR [r9+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r13
|
|
|
- mov QWORD PTR [rbp+24], r12
|
|
|
- mov QWORD PTR [rbp+32], r10
|
|
|
- ; Zero into %r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[5] x A[0]
|
|
|
- mulx r12, rax, QWORD PTR [r9+40]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- ; A[6] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [rbp+40], r11
|
|
|
- mov QWORD PTR [rbp+48], r12
|
|
|
- ; Zero into %r9
|
|
|
- ; Zero into %r10
|
|
|
- ; A[7] x A[0]
|
|
|
- mulx r11, rax, QWORD PTR [r9+56]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r13
|
|
|
- ; A[8] x A[0]
|
|
|
- mulx r12, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [rbp+56], r10
|
|
|
- mov QWORD PTR [rbp+64], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[9] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+72]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- ; A[10] x A[0]
|
|
|
- mulx r11, rax, QWORD PTR [r9+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r13
|
|
|
- mov QWORD PTR [rbp+72], r12
|
|
|
- mov QWORD PTR [rbp+80], r10
|
|
|
- ; No load %r13 - %r10
|
|
|
- ; A[11] x A[0]
|
|
|
- mulx r15, rax, QWORD PTR [r9+88]
|
|
|
- adcx r11, rax
|
|
|
- adox r15, r13
|
|
|
- ; A[12] x A[0]
|
|
|
- mulx rdi, rax, QWORD PTR [r9+96]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, r13
|
|
|
- mov QWORD PTR [rbp+88], r11
|
|
|
- ; No store %r13 - %r10
|
|
|
- ; No load %r15 - %r9
|
|
|
- ; A[13] x A[0]
|
|
|
- mulx rsi, rax, QWORD PTR [r9+104]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, r13
|
|
|
- ; A[14] x A[0]
|
|
|
- mulx rbx, rax, QWORD PTR [r9+112]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, r13
|
|
|
- ; No store %r14 - %r8
|
|
|
- ; No store %r15 - %r9
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[15] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+120]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, r13
|
|
|
- ; No store %rbx - %r10
|
|
|
- ; Carry
|
|
|
- adcx r10, r13
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- ; Diagonal 2
|
|
|
- mov r10, QWORD PTR [rbp+24]
|
|
|
- mov r11, QWORD PTR [rbp+32]
|
|
|
- mov r12, QWORD PTR [rbp+40]
|
|
|
- ; A[2] x A[1]
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+16]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[3] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+24]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbp+24], r10
|
|
|
- mov QWORD PTR [rbp+32], r11
|
|
|
- mov r10, QWORD PTR [rbp+48]
|
|
|
- mov r11, QWORD PTR [rbp+56]
|
|
|
- ; A[4] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbp+40], r12
|
|
|
- mov QWORD PTR [rbp+48], r10
|
|
|
- mov r12, QWORD PTR [rbp+64]
|
|
|
- mov r10, QWORD PTR [rbp+72]
|
|
|
- ; A[6] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbp+56], r11
|
|
|
- mov QWORD PTR [rbp+64], r12
|
|
|
- mov r11, QWORD PTR [rbp+80]
|
|
|
- mov r12, QWORD PTR [rbp+88]
|
|
|
- ; A[8] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[9] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbp+72], r10
|
|
|
- mov QWORD PTR [rbp+80], r11
|
|
|
- ; No load %r13 - %r8
|
|
|
- ; A[10] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r12, rax
|
|
|
- adox r15, rcx
|
|
|
- ; A[11] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- mov QWORD PTR [rbp+88], r12
|
|
|
- ; No store %r13 - %r8
|
|
|
- ; No load %r15 - %r10
|
|
|
- ; A[12] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[13] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r9
|
|
|
- ; No store %r15 - %r10
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[14] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] x A[1]
|
|
|
- mulx r12, rax, QWORD PTR [r9+120]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[15] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx r10, rax, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- ; Diagonal 3
|
|
|
- mov r10, QWORD PTR [rbp+40]
|
|
|
- mov r11, QWORD PTR [rbp+48]
|
|
|
- mov r12, QWORD PTR [rbp+56]
|
|
|
- ; A[3] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+24]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbp+40], r10
|
|
|
- mov QWORD PTR [rbp+48], r11
|
|
|
- mov r10, QWORD PTR [rbp+64]
|
|
|
- mov r11, QWORD PTR [rbp+72]
|
|
|
- ; A[5] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[6] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbp+56], r12
|
|
|
- mov QWORD PTR [rbp+64], r10
|
|
|
- mov r12, QWORD PTR [rbp+80]
|
|
|
- mov r10, QWORD PTR [rbp+88]
|
|
|
- ; A[7] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[8] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbp+72], r11
|
|
|
- mov QWORD PTR [rbp+80], r12
|
|
|
- ; No load %r13 - %r9
|
|
|
- ; A[9] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r10, rax
|
|
|
- adox r15, rcx
|
|
|
- ; A[10] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- mov QWORD PTR [rbp+88], r10
|
|
|
- ; No store %r13 - %r9
|
|
|
- ; No load %r15 - %r8
|
|
|
- ; A[11] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[12] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r10
|
|
|
- ; No store %r15 - %r8
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- ; A[13] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx rbx, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[14] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; No store %rbx - %r9
|
|
|
- mov QWORD PTR [r8], r12
|
|
|
- mov r11, QWORD PTR [r8+16]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[14] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[14] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx r12, rax, QWORD PTR [r9+112]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+8], r10
|
|
|
- mov QWORD PTR [r8+16], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[14] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx r10, rax, QWORD PTR [r9+112]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+24], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- ; Diagonal 4
|
|
|
- mov r10, QWORD PTR [rbp+56]
|
|
|
- mov r11, QWORD PTR [rbp+64]
|
|
|
- mov r12, QWORD PTR [rbp+72]
|
|
|
- ; A[4] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbp+56], r10
|
|
|
- mov QWORD PTR [rbp+64], r11
|
|
|
- mov r10, QWORD PTR [rbp+80]
|
|
|
- mov r11, QWORD PTR [rbp+88]
|
|
|
- ; A[6] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbp+72], r12
|
|
|
- mov QWORD PTR [rbp+80], r10
|
|
|
- ; No load %r13 - %r10
|
|
|
- ; A[8] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r15, rcx
|
|
|
- ; A[9] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- mov QWORD PTR [rbp+88], r11
|
|
|
- ; No store %r13 - %r10
|
|
|
- ; No load %r15 - %r9
|
|
|
- ; A[10] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[11] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r8
|
|
|
- ; No store %r15 - %r9
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[12] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[13] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; No store %rbx - %r10
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[13] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[13] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[13] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[13] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx r12, rax, QWORD PTR [r9+104]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[13] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx r10, rax, QWORD PTR [r9+104]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- ; Diagonal 5
|
|
|
- mov r10, QWORD PTR [rbp+72]
|
|
|
- mov r11, QWORD PTR [rbp+80]
|
|
|
- mov r12, QWORD PTR [rbp+88]
|
|
|
- ; A[5] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbp+72], r10
|
|
|
- mov QWORD PTR [rbp+80], r11
|
|
|
- ; No load %r13 - %r8
|
|
|
- ; A[7] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r12, rax
|
|
|
- adox r15, rcx
|
|
|
- ; A[8] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- mov QWORD PTR [rbp+88], r12
|
|
|
- ; No store %r13 - %r8
|
|
|
- ; No load %r15 - %r10
|
|
|
- ; A[9] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[10] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r9
|
|
|
- ; No store %r15 - %r10
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[11] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[12] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- ; A[12] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[12] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- ; A[12] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[12] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+24], r11
|
|
|
- mov QWORD PTR [r8+32], r12
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[12] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[12] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx r12, rax, QWORD PTR [r9+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+40], r10
|
|
|
- mov QWORD PTR [r8+48], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[12] x A[11]
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mulx r10, rax, QWORD PTR [r9+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+56], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+64], r10
|
|
|
- ; Diagonal 6
|
|
|
- mov r10, QWORD PTR [rbp+88]
|
|
|
- ; No load %r13 - %r9
|
|
|
- ; A[6] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r15, rcx
|
|
|
- ; A[7] x A[5]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- mov QWORD PTR [rbp+88], r10
|
|
|
- ; No store %r13 - %r9
|
|
|
- ; No load %r15 - %r8
|
|
|
- ; A[8] x A[5]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[9] x A[5]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r10
|
|
|
- ; No store %r15 - %r8
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- ; A[10] x A[5]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx rbx, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[11] x A[5]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; No store %rbx - %r9
|
|
|
- mov QWORD PTR [r8], r12
|
|
|
- mov r11, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [r8+24]
|
|
|
- ; A[11] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[11] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+8], r10
|
|
|
- mov QWORD PTR [r8+16], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[11] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[11] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+24], r12
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [r8+56]
|
|
|
- ; A[11] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[13] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov r11, QWORD PTR [r8+64]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[13] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[13] x A[11]
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mulx r12, rax, QWORD PTR [r9+104]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+56], r10
|
|
|
- mov QWORD PTR [r8+64], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[13] x A[12]
|
|
|
- mov rdx, QWORD PTR [r9+96]
|
|
|
- mulx r10, rax, QWORD PTR [r9+104]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+72], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+80], r10
|
|
|
- ; Diagonal 7
|
|
|
- ; No load %r15 - %r9
|
|
|
- ; A[7] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[8] x A[6]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r8
|
|
|
- ; No store %r15 - %r9
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[9] x A[6]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[10] x A[6]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; No store %rbx - %r10
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[10] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[10] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- mov r12, QWORD PTR [r8+40]
|
|
|
- ; A[10] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[14] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- ; A[14] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[14] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- mov r12, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- ; A[14] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[14] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+56], r11
|
|
|
- mov QWORD PTR [r8+64], r12
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[14] x A[11]
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[14] x A[12]
|
|
|
- mov rdx, QWORD PTR [r9+96]
|
|
|
- mulx r12, rax, QWORD PTR [r9+112]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[14] x A[13]
|
|
|
- mov rdx, QWORD PTR [r9+104]
|
|
|
- mulx r10, rax, QWORD PTR [r9+112]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+96], r10
|
|
|
- ; Diagonal 8
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[8] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[9] x A[7]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- ; A[9] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[15] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- ; A[15] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[15] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+24], r11
|
|
|
- mov QWORD PTR [r8+32], r12
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- ; A[15] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+40], r10
|
|
|
- mov QWORD PTR [r8+48], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- ; A[15] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[15] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+56], r12
|
|
|
- mov QWORD PTR [r8+64], r10
|
|
|
- mov r12, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- ; A[15] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[15] x A[11]
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+72], r11
|
|
|
- mov QWORD PTR [r8+80], r12
|
|
|
- mov r11, QWORD PTR [r8+96]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[15] x A[12]
|
|
|
- mov rdx, QWORD PTR [r9+96]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] x A[13]
|
|
|
- mov rdx, QWORD PTR [r9+104]
|
|
|
- mulx r12, rax, QWORD PTR [r9+120]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+88], r10
|
|
|
- mov QWORD PTR [r8+96], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[15] x A[14]
|
|
|
- mov rdx, QWORD PTR [r9+112]
|
|
|
- mulx r10, rax, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+104], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+112], r10
|
|
|
- mov QWORD PTR [r8+120], r14
|
|
|
- ; Double and Add in A[i] x A[i]
|
|
|
- mov r11, QWORD PTR [rbp+8]
|
|
|
- ; A[0] x A[0]
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- mov QWORD PTR [rbp], rax
|
|
|
- adox r11, r11
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+8], r11
|
|
|
- mov r10, QWORD PTR [rbp+16]
|
|
|
- mov r11, QWORD PTR [rbp+24]
|
|
|
- ; A[1] x A[1]
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+16], r10
|
|
|
- mov QWORD PTR [rbp+24], r11
|
|
|
- mov r10, QWORD PTR [rbp+32]
|
|
|
- mov r11, QWORD PTR [rbp+40]
|
|
|
- ; A[2] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+32], r10
|
|
|
- mov QWORD PTR [rbp+40], r11
|
|
|
- mov r10, QWORD PTR [rbp+48]
|
|
|
- mov r11, QWORD PTR [rbp+56]
|
|
|
- ; A[3] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+48], r10
|
|
|
- mov QWORD PTR [rbp+56], r11
|
|
|
- mov r10, QWORD PTR [rbp+64]
|
|
|
- mov r11, QWORD PTR [rbp+72]
|
|
|
- ; A[4] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+64], r10
|
|
|
- mov QWORD PTR [rbp+72], r11
|
|
|
- mov r10, QWORD PTR [rbp+80]
|
|
|
- mov r11, QWORD PTR [rbp+88]
|
|
|
- ; A[5] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+80], r10
|
|
|
- mov QWORD PTR [rbp+88], r11
|
|
|
- ; A[6] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r15, r15
|
|
|
- adox rdi, rdi
|
|
|
- adcx r15, rax
|
|
|
- adcx rdi, rcx
|
|
|
- ; A[7] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox rsi, rsi
|
|
|
- adox rbx, rbx
|
|
|
- adcx rsi, rax
|
|
|
- adcx rbx, rcx
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[8] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- ; A[9] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- mov QWORD PTR [r8+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[10] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- ; A[11] x A[11]
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- mov QWORD PTR [r8+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- ; A[12] x A[12]
|
|
|
- mov rdx, QWORD PTR [r9+96]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+64], r10
|
|
|
- mov QWORD PTR [r8+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- ; A[13] x A[13]
|
|
|
- mov rdx, QWORD PTR [r9+104]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+80], r10
|
|
|
- mov QWORD PTR [r8+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- ; A[14] x A[14]
|
|
|
- mov rdx, QWORD PTR [r9+112]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+96], r10
|
|
|
- mov QWORD PTR [r8+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- ; A[15] x A[15]
|
|
|
- mov rdx, QWORD PTR [r9+120]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+112], r10
|
|
|
- mov QWORD PTR [r8+120], r11
|
|
|
- mov QWORD PTR [r8+-32], r15
|
|
|
- mov QWORD PTR [r8+-24], rdi
|
|
|
- mov QWORD PTR [r8+-16], rsi
|
|
|
- mov QWORD PTR [r8+-8], rbx
|
|
|
- sub r8, 128
|
|
|
- cmp r9, r8
|
|
|
- jne L_end_2048_sqr_avx2_16
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp]
|
|
|
- vmovups OWORD PTR [r8], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+16]
|
|
|
- vmovups OWORD PTR [r8+16], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+32]
|
|
|
- vmovups OWORD PTR [r8+32], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+48]
|
|
|
- vmovups OWORD PTR [r8+48], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+64]
|
|
|
- vmovups OWORD PTR [r8+64], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+80]
|
|
|
- vmovups OWORD PTR [r8+80], xmm0
|
|
|
-L_end_2048_sqr_avx2_16:
|
|
|
- add rsp, 128
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- ret
|
|
|
-sp_2048_sqr_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * Karatsuba: ah^2, al^2, (al - ah)^2
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_sqr_32 PROC
|
|
|
- sub rsp, 272
|
|
|
- mov QWORD PTR [rsp+256], rcx
|
|
|
- mov QWORD PTR [rsp+264], rdx
|
|
|
- mov r9, 0
|
|
|
- mov r10, rsp
|
|
|
- lea r11, QWORD PTR [rdx+128]
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- sub rax, QWORD PTR [r11]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- sbb r8, QWORD PTR [r11+8]
|
|
|
- mov rax, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- sbb rax, QWORD PTR [r11+16]
|
|
|
- mov r8, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- sbb r8, QWORD PTR [r11+24]
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- sbb rax, QWORD PTR [r11+32]
|
|
|
- mov r8, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- sbb r8, QWORD PTR [r11+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- sbb rax, QWORD PTR [r11+48]
|
|
|
- mov r8, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- sbb r8, QWORD PTR [r11+56]
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- sbb rax, QWORD PTR [r11+64]
|
|
|
- mov r8, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- sbb r8, QWORD PTR [r11+72]
|
|
|
- mov rax, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- sbb rax, QWORD PTR [r11+80]
|
|
|
- mov r8, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- sbb r8, QWORD PTR [r11+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- sbb rax, QWORD PTR [r11+96]
|
|
|
- mov r8, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- sbb r8, QWORD PTR [r11+104]
|
|
|
- mov rax, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- sbb rax, QWORD PTR [r11+112]
|
|
|
- mov r8, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- sbb r8, QWORD PTR [r11+120]
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- sbb r9, 0
|
|
|
- ; Cond Negate
|
|
|
- mov rax, QWORD PTR [r10]
|
|
|
- mov r11, r9
|
|
|
- xor rax, r9
|
|
|
- neg r11
|
|
|
- sub rax, r9
|
|
|
- mov r8, QWORD PTR [r10+8]
|
|
|
- sbb r11, 0
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+16]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+24]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+32]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+40]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+56]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+64]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+72]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+80]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+88]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+104]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+112]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+120]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- mov rdx, r10
|
|
|
- mov rcx, rsp
|
|
|
- call sp_2048_sqr_16
|
|
|
- mov rdx, QWORD PTR [rsp+264]
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
- add rdx, 128
|
|
|
- add rcx, 256
|
|
|
- call sp_2048_sqr_16
|
|
|
- mov rdx, QWORD PTR [rsp+264]
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
- call sp_2048_sqr_16
|
|
|
-IFDEF _WIN64
|
|
|
- mov rdx, QWORD PTR [rsp+264]
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
-ENDIF
|
|
|
- mov rdx, QWORD PTR [rsp+256]
|
|
|
- lea r10, QWORD PTR [rsp+128]
|
|
|
- add rdx, 384
|
|
|
- mov r9, 0
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- sub r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r9, 0
|
|
|
- sub rdx, 256
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- sub r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
- neg r9
|
|
|
- add rcx, 256
|
|
|
- mov r8, QWORD PTR [rcx+-128]
|
|
|
- sub r8, QWORD PTR [r10+-128]
|
|
|
- mov rax, QWORD PTR [rcx+-120]
|
|
|
- mov QWORD PTR [rcx+-128], r8
|
|
|
- sbb rax, QWORD PTR [r10+-120]
|
|
|
- mov r8, QWORD PTR [rcx+-112]
|
|
|
- mov QWORD PTR [rcx+-120], rax
|
|
|
- sbb r8, QWORD PTR [r10+-112]
|
|
|
- mov rax, QWORD PTR [rcx+-104]
|
|
|
- mov QWORD PTR [rcx+-112], r8
|
|
|
- sbb rax, QWORD PTR [r10+-104]
|
|
|
- mov r8, QWORD PTR [rcx+-96]
|
|
|
- mov QWORD PTR [rcx+-104], rax
|
|
|
- sbb r8, QWORD PTR [r10+-96]
|
|
|
- mov rax, QWORD PTR [rcx+-88]
|
|
|
- mov QWORD PTR [rcx+-96], r8
|
|
|
- sbb rax, QWORD PTR [r10+-88]
|
|
|
- mov r8, QWORD PTR [rcx+-80]
|
|
|
- mov QWORD PTR [rcx+-88], rax
|
|
|
- sbb r8, QWORD PTR [r10+-80]
|
|
|
- mov rax, QWORD PTR [rcx+-72]
|
|
|
- mov QWORD PTR [rcx+-80], r8
|
|
|
- sbb rax, QWORD PTR [r10+-72]
|
|
|
- mov r8, QWORD PTR [rcx+-64]
|
|
|
- mov QWORD PTR [rcx+-72], rax
|
|
|
- sbb r8, QWORD PTR [r10+-64]
|
|
|
- mov rax, QWORD PTR [rcx+-56]
|
|
|
- mov QWORD PTR [rcx+-64], r8
|
|
|
- sbb rax, QWORD PTR [r10+-56]
|
|
|
- mov r8, QWORD PTR [rcx+-48]
|
|
|
- mov QWORD PTR [rcx+-56], rax
|
|
|
- sbb r8, QWORD PTR [r10+-48]
|
|
|
- mov rax, QWORD PTR [rcx+-40]
|
|
|
- mov QWORD PTR [rcx+-48], r8
|
|
|
- sbb rax, QWORD PTR [r10+-40]
|
|
|
- mov r8, QWORD PTR [rcx+-32]
|
|
|
- mov QWORD PTR [rcx+-40], rax
|
|
|
- sbb r8, QWORD PTR [r10+-32]
|
|
|
- mov rax, QWORD PTR [rcx+-24]
|
|
|
- mov QWORD PTR [rcx+-32], r8
|
|
|
- sbb rax, QWORD PTR [r10+-24]
|
|
|
- mov r8, QWORD PTR [rcx+-16]
|
|
|
- mov QWORD PTR [rcx+-24], rax
|
|
|
- sbb r8, QWORD PTR [r10+-16]
|
|
|
- mov rax, QWORD PTR [rcx+-8]
|
|
|
- mov QWORD PTR [rcx+-16], r8
|
|
|
- sbb rax, QWORD PTR [r10+-8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rcx+-8], rax
|
|
|
- sbb r8, QWORD PTR [r10]
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb rax, QWORD PTR [r10+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- sbb r8, QWORD PTR [r10+16]
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb rax, QWORD PTR [r10+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- sbb r8, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb rax, QWORD PTR [r10+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- sbb r8, QWORD PTR [r10+48]
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb rax, QWORD PTR [r10+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- sbb r8, QWORD PTR [r10+64]
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb rax, QWORD PTR [r10+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- sbb r8, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb rax, QWORD PTR [r10+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- sbb r8, QWORD PTR [r10+96]
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb rax, QWORD PTR [r10+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- sbb r8, QWORD PTR [r10+112]
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
- add rcx, 384
|
|
|
- ; Add in word
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- add r8, r9
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- adc rax, 0
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- mov rdx, QWORD PTR [rsp+264]
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
- add rsp, 272
|
|
|
- ret
|
|
|
-sp_2048_sqr_32 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * Karatsuba: ah^2, al^2, (al - ah)^2
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_sqr_avx2_32 PROC
|
|
|
- sub rsp, 272
|
|
|
- mov QWORD PTR [rsp+256], rcx
|
|
|
- mov QWORD PTR [rsp+264], rdx
|
|
|
- mov r9, 0
|
|
|
- mov r10, rsp
|
|
|
- lea r11, QWORD PTR [rdx+128]
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- sub rax, QWORD PTR [r11]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- sbb r8, QWORD PTR [r11+8]
|
|
|
- mov rax, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- sbb rax, QWORD PTR [r11+16]
|
|
|
- mov r8, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- sbb r8, QWORD PTR [r11+24]
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- sbb rax, QWORD PTR [r11+32]
|
|
|
- mov r8, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- sbb r8, QWORD PTR [r11+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- sbb rax, QWORD PTR [r11+48]
|
|
|
- mov r8, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- sbb r8, QWORD PTR [r11+56]
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- sbb rax, QWORD PTR [r11+64]
|
|
|
- mov r8, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- sbb r8, QWORD PTR [r11+72]
|
|
|
- mov rax, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- sbb rax, QWORD PTR [r11+80]
|
|
|
- mov r8, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- sbb r8, QWORD PTR [r11+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- sbb rax, QWORD PTR [r11+96]
|
|
|
- mov r8, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- sbb r8, QWORD PTR [r11+104]
|
|
|
- mov rax, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- sbb rax, QWORD PTR [r11+112]
|
|
|
- mov r8, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- sbb r8, QWORD PTR [r11+120]
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- sbb r9, 0
|
|
|
- ; Cond Negate
|
|
|
- mov rax, QWORD PTR [r10]
|
|
|
- mov r11, r9
|
|
|
- xor rax, r9
|
|
|
- neg r11
|
|
|
- sub rax, r9
|
|
|
- mov r8, QWORD PTR [r10+8]
|
|
|
- sbb r11, 0
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+16]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+24]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+32]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+40]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+56]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+64]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+72]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+80]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+88]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+104]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+112]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+120]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- mov rdx, r10
|
|
|
- mov rcx, rsp
|
|
|
- call sp_2048_sqr_avx2_16
|
|
|
- mov rdx, QWORD PTR [rsp+264]
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
- add rdx, 128
|
|
|
- add rcx, 256
|
|
|
- call sp_2048_sqr_avx2_16
|
|
|
- mov rdx, QWORD PTR [rsp+264]
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
- call sp_2048_sqr_avx2_16
|
|
|
-IFDEF _WIN64
|
|
|
- mov rdx, QWORD PTR [rsp+264]
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
-ENDIF
|
|
|
- mov rdx, QWORD PTR [rsp+256]
|
|
|
- lea r10, QWORD PTR [rsp+128]
|
|
|
- add rdx, 384
|
|
|
- mov r9, 0
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- sub r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r9, 0
|
|
|
- sub rdx, 256
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- sub r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
- neg r9
|
|
|
- add rcx, 256
|
|
|
- mov r8, QWORD PTR [rcx+-128]
|
|
|
- sub r8, QWORD PTR [r10+-128]
|
|
|
- mov rax, QWORD PTR [rcx+-120]
|
|
|
- mov QWORD PTR [rcx+-128], r8
|
|
|
- sbb rax, QWORD PTR [r10+-120]
|
|
|
- mov r8, QWORD PTR [rcx+-112]
|
|
|
- mov QWORD PTR [rcx+-120], rax
|
|
|
- sbb r8, QWORD PTR [r10+-112]
|
|
|
- mov rax, QWORD PTR [rcx+-104]
|
|
|
- mov QWORD PTR [rcx+-112], r8
|
|
|
- sbb rax, QWORD PTR [r10+-104]
|
|
|
- mov r8, QWORD PTR [rcx+-96]
|
|
|
- mov QWORD PTR [rcx+-104], rax
|
|
|
- sbb r8, QWORD PTR [r10+-96]
|
|
|
- mov rax, QWORD PTR [rcx+-88]
|
|
|
- mov QWORD PTR [rcx+-96], r8
|
|
|
- sbb rax, QWORD PTR [r10+-88]
|
|
|
- mov r8, QWORD PTR [rcx+-80]
|
|
|
- mov QWORD PTR [rcx+-88], rax
|
|
|
- sbb r8, QWORD PTR [r10+-80]
|
|
|
- mov rax, QWORD PTR [rcx+-72]
|
|
|
- mov QWORD PTR [rcx+-80], r8
|
|
|
- sbb rax, QWORD PTR [r10+-72]
|
|
|
- mov r8, QWORD PTR [rcx+-64]
|
|
|
- mov QWORD PTR [rcx+-72], rax
|
|
|
- sbb r8, QWORD PTR [r10+-64]
|
|
|
- mov rax, QWORD PTR [rcx+-56]
|
|
|
- mov QWORD PTR [rcx+-64], r8
|
|
|
- sbb rax, QWORD PTR [r10+-56]
|
|
|
- mov r8, QWORD PTR [rcx+-48]
|
|
|
- mov QWORD PTR [rcx+-56], rax
|
|
|
- sbb r8, QWORD PTR [r10+-48]
|
|
|
- mov rax, QWORD PTR [rcx+-40]
|
|
|
- mov QWORD PTR [rcx+-48], r8
|
|
|
- sbb rax, QWORD PTR [r10+-40]
|
|
|
- mov r8, QWORD PTR [rcx+-32]
|
|
|
- mov QWORD PTR [rcx+-40], rax
|
|
|
- sbb r8, QWORD PTR [r10+-32]
|
|
|
- mov rax, QWORD PTR [rcx+-24]
|
|
|
- mov QWORD PTR [rcx+-32], r8
|
|
|
- sbb rax, QWORD PTR [r10+-24]
|
|
|
- mov r8, QWORD PTR [rcx+-16]
|
|
|
- mov QWORD PTR [rcx+-24], rax
|
|
|
- sbb r8, QWORD PTR [r10+-16]
|
|
|
- mov rax, QWORD PTR [rcx+-8]
|
|
|
- mov QWORD PTR [rcx+-16], r8
|
|
|
- sbb rax, QWORD PTR [r10+-8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rcx+-8], rax
|
|
|
- sbb r8, QWORD PTR [r10]
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb rax, QWORD PTR [r10+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- sbb r8, QWORD PTR [r10+16]
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb rax, QWORD PTR [r10+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- sbb r8, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb rax, QWORD PTR [r10+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- sbb r8, QWORD PTR [r10+48]
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb rax, QWORD PTR [r10+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- sbb r8, QWORD PTR [r10+64]
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb rax, QWORD PTR [r10+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- sbb r8, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb rax, QWORD PTR [r10+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- sbb r8, QWORD PTR [r10+96]
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb rax, QWORD PTR [r10+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- sbb r8, QWORD PTR [r10+112]
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
- add rcx, 384
|
|
|
- ; Add in word
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- add r8, r9
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- adc rax, 0
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- mov rdx, QWORD PTR [rsp+264]
|
|
|
- mov rcx, QWORD PTR [rsp+256]
|
|
|
- add rsp, 272
|
|
|
- ret
|
|
|
-sp_2048_sqr_avx2_32 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Sub b from a into a. (a -= b)
|
|
|
-; *
|
|
|
-; * a A single precision integer and result.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_sub_in_place_16 PROC
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- sub r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb r9, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb r9, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb r9, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb r9, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb r9, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb r9, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb r9, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb r9, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_2048_sub_in_place_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mul_d_32 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- mov r10, rax
|
|
|
- mov r11, rdx
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- ; A[1] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[16] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+128]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[17] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+136]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[18] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+144]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[19] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+152]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[20] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+160]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[21] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+168]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[22] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+176]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[23] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+184]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[24] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+192]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[25] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+200]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[26] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+208]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+208], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[27] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+216]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[28] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+224]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[29] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+232]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[30] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+240]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[31] * B
|
|
|
- mov rax, r8
|
|
|
- mul QWORD PTR [r9+248]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- mov QWORD PTR [rcx+256], r12
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_mul_d_32 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_cond_sub_16 PROC
|
|
|
- sub rsp, 128
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rsp]
|
|
|
- sub r10, r8
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rsp+8]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r8, QWORD PTR [rsp+16]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rsp+24]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rsp+32]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rsp+40]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r8, QWORD PTR [rsp+48]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rsp+56]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov r8, QWORD PTR [rsp+64]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov r11, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rsp+72]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r8, QWORD PTR [rsp+80]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rsp+88]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- mov r8, QWORD PTR [rsp+96]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rsp+104]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r8, QWORD PTR [rsp+112]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rsp+120]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- sbb rax, rax
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_2048_cond_sub_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 2048 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mont_reduce_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov r9, rdx
|
|
|
- xor rsi, rsi
|
|
|
- ; i = 16
|
|
|
- mov r10, 16
|
|
|
- mov r15, QWORD PTR [rcx]
|
|
|
- mov rdi, QWORD PTR [rcx+8]
|
|
|
-L_2048_mont_reduce_16_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov r13, r15
|
|
|
- imul r13, r8
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- add r15, rax
|
|
|
- adc r12, rdx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- mov r15, rdi
|
|
|
- add r15, rax
|
|
|
- adc r11, rdx
|
|
|
- add r15, r12
|
|
|
- adc r11, 0
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- mov rdi, QWORD PTR [rcx+16]
|
|
|
- add rdi, rax
|
|
|
- adc r12, rdx
|
|
|
- add rdi, r11
|
|
|
- adc r12, 0
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- mov r14, QWORD PTR [rcx+24]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+24], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- mov r14, QWORD PTR [rcx+32]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+32], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- mov r14, QWORD PTR [rcx+40]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+40], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- mov r14, QWORD PTR [rcx+48]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- mov r14, QWORD PTR [rcx+56]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+56], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- mov r14, QWORD PTR [rcx+64]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+64], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- mov r14, QWORD PTR [rcx+72]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+72], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- mov r14, QWORD PTR [rcx+80]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+80], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- mov r14, QWORD PTR [rcx+88]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+88], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- mov r14, QWORD PTR [rcx+96]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+96], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- mov r14, QWORD PTR [rcx+104]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+104], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- mov r14, QWORD PTR [rcx+112]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+112], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mov rax, r13
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- mov r14, QWORD PTR [rcx+120]
|
|
|
- add r12, rax
|
|
|
- adc rdx, rsi
|
|
|
- mov rsi, 0
|
|
|
- adc rsi, 0
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+120], r14
|
|
|
- adc QWORD PTR [rcx+128], rdx
|
|
|
- adc rsi, 0
|
|
|
- ; i -= 1
|
|
|
- add rcx, 8
|
|
|
- dec r10
|
|
|
- jnz L_2048_mont_reduce_16_loop
|
|
|
- mov QWORD PTR [rcx], r15
|
|
|
- mov QWORD PTR [rcx+8], rdi
|
|
|
- neg rsi
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, r9
|
|
|
- mov r9, rsi
|
|
|
-ELSE
|
|
|
- mov r9, rsi
|
|
|
- mov r8, r9
|
|
|
-ENDIF
|
|
|
- mov rdx, rcx
|
|
|
- mov rcx, rcx
|
|
|
- sub rcx, 128
|
|
|
- call sp_2048_cond_sub_16
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_mont_reduce_16 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_cond_sub_avx2_16 PROC
|
|
|
- push r12
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- pext r12, r12, r9
|
|
|
- sub r10, r12
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+104]
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- sbb r10, r11
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- sbb rax, rax
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_cond_sub_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mul_d_16 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- mov r10, rax
|
|
|
- mov r11, rdx
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- ; A[1] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B
|
|
|
- mov rax, r8
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_mul_d_16 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mul_d_avx2_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rdx, r8
|
|
|
- xor r13, r13
|
|
|
- mulx r12, r11, QWORD PTR [rax]
|
|
|
- mov QWORD PTR [rcx], r11
|
|
|
- ; A[1] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+8], r12
|
|
|
- ; A[2] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- ; A[3] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- ; A[4] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- ; A[5] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- ; A[6] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+48]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- ; A[7] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+56]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- ; A[8] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+64]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- ; A[9] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+72]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+72], r12
|
|
|
- ; A[10] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+80]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- ; A[11] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+88]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- ; A[12] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+96]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+96], r11
|
|
|
- ; A[13] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+104]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+104], r12
|
|
|
- ; A[14] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+112]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- ; A[15] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+120]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- adcx r11, r13
|
|
|
- mov QWORD PTR [rcx+120], r12
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_mul_d_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF _WIN64
|
|
|
-; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
|
|
|
-; *
|
|
|
-; * d1 The high order half of the number to divide.
|
|
|
-; * d0 The low order half of the number to divide.
|
|
|
-; * div The dividend.
|
|
|
-; * returns the result of the division.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-div_2048_word_asm_16 PROC
|
|
|
- mov r9, rdx
|
|
|
- mov rax, r9
|
|
|
- mov rdx, rcx
|
|
|
- div r8
|
|
|
- ret
|
|
|
-div_2048_word_asm_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Compare a with b in constant time.
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; * return -ve, 0 or +ve if a is less than, equal to or greater than b
|
|
|
-; * respectively.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_cmp_16 PROC
|
|
|
- push r12
|
|
|
- xor r9, r9
|
|
|
- mov r8, -1
|
|
|
- mov rax, -1
|
|
|
- mov r10, 1
|
|
|
- mov r11, QWORD PTR [rcx+120]
|
|
|
- mov r12, QWORD PTR [rdx+120]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+104]
|
|
|
- mov r12, QWORD PTR [rdx+104]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+96]
|
|
|
- mov r12, QWORD PTR [rdx+96]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+80]
|
|
|
- mov r12, QWORD PTR [rdx+80]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+72]
|
|
|
- mov r12, QWORD PTR [rdx+72]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- mov r12, QWORD PTR [rdx+56]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+48]
|
|
|
- mov r12, QWORD PTR [rdx+48]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+32]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+8]
|
|
|
- mov r12, QWORD PTR [rdx+8]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx]
|
|
|
- mov r12, QWORD PTR [rdx]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- xor rax, r8
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_cmp_16 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_get_from_table_16 PROC
|
|
|
- sub rsp, 128
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- mov rax, 1
|
|
|
- movd xmm10, r8
|
|
|
- movd xmm11, rax
|
|
|
- pxor xmm13, xmm13
|
|
|
- pshufd xmm11, xmm11, 0
|
|
|
- pshufd xmm10, xmm10, 0
|
|
|
- ; START: 0-7
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 0-7
|
|
|
- ; START: 8-15
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- ; END: 8-15
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_2048_get_from_table_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Reduce the number back to 2048 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mont_reduce_avx2_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- mov r9, rcx
|
|
|
- mov r10, rdx
|
|
|
- xor rbp, rbp
|
|
|
- ; i = 16
|
|
|
- mov r11, 16
|
|
|
- mov r14, QWORD PTR [r9]
|
|
|
- mov r15, QWORD PTR [r9+8]
|
|
|
- mov rdi, QWORD PTR [r9+16]
|
|
|
- mov rsi, QWORD PTR [r9+24]
|
|
|
- add r9, 64
|
|
|
- xor rbp, rbp
|
|
|
-L_2048_mont_reduce_avx2_16_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r12, r14
|
|
|
- imul rdx, r8
|
|
|
- xor rbx, rbx
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10]
|
|
|
- mov r14, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r14, rcx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+8]
|
|
|
- mov r15, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+16]
|
|
|
- mov rdi, rsi
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+24]
|
|
|
- mov rsi, QWORD PTR [r9+-32]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+32]
|
|
|
- mov r13, QWORD PTR [r9+-24]
|
|
|
- adcx rsi, rax
|
|
|
- adox r13, rcx
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+40]
|
|
|
- mov r12, QWORD PTR [r9+-16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-24], r13
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+48]
|
|
|
- mov r13, QWORD PTR [r9+-8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-16], r12
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+56]
|
|
|
- mov r12, QWORD PTR [r9]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-8], r13
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+64]
|
|
|
- mov r13, QWORD PTR [r9+8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9], r12
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+72]
|
|
|
- mov r12, QWORD PTR [r9+16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+8], r13
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+80]
|
|
|
- mov r13, QWORD PTR [r9+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+16], r12
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+88]
|
|
|
- mov r12, QWORD PTR [r9+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+24], r13
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+96]
|
|
|
- mov r13, QWORD PTR [r9+40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+32], r12
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+104]
|
|
|
- mov r12, QWORD PTR [r9+48]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+40], r13
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+112]
|
|
|
- mov r13, QWORD PTR [r9+56]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+48], r12
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+120]
|
|
|
- mov r12, QWORD PTR [r9+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+56], r13
|
|
|
- adcx r12, rbp
|
|
|
- mov rbp, rbx
|
|
|
- mov QWORD PTR [r9+64], r12
|
|
|
- adox rbp, rbx
|
|
|
- adcx rbp, rbx
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r12, r14
|
|
|
- imul rdx, r8
|
|
|
- xor rbx, rbx
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10]
|
|
|
- mov r14, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r14, rcx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+8]
|
|
|
- mov r15, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+16]
|
|
|
- mov rdi, rsi
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+24]
|
|
|
- mov rsi, QWORD PTR [r9+-24]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+32]
|
|
|
- mov r13, QWORD PTR [r9+-16]
|
|
|
- adcx rsi, rax
|
|
|
- adox r13, rcx
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+40]
|
|
|
- mov r12, QWORD PTR [r9+-8]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-16], r13
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+48]
|
|
|
- mov r13, QWORD PTR [r9]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-8], r12
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+56]
|
|
|
- mov r12, QWORD PTR [r9+8]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9], r13
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+64]
|
|
|
- mov r13, QWORD PTR [r9+16]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+8], r12
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+72]
|
|
|
- mov r12, QWORD PTR [r9+24]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+16], r13
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+80]
|
|
|
- mov r13, QWORD PTR [r9+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+24], r12
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+88]
|
|
|
- mov r12, QWORD PTR [r9+40]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+32], r13
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+96]
|
|
|
- mov r13, QWORD PTR [r9+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+40], r12
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+104]
|
|
|
- mov r12, QWORD PTR [r9+56]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+48], r13
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+112]
|
|
|
- mov r13, QWORD PTR [r9+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+56], r12
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+120]
|
|
|
- mov r12, QWORD PTR [r9+72]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+64], r13
|
|
|
- adcx r12, rbp
|
|
|
- mov rbp, rbx
|
|
|
- mov QWORD PTR [r9+72], r12
|
|
|
- adox rbp, rbx
|
|
|
- adcx rbp, rbx
|
|
|
- ; a += 2
|
|
|
- add r9, 16
|
|
|
- ; i -= 2
|
|
|
- sub r11, 2
|
|
|
- jnz L_2048_mont_reduce_avx2_16_loop
|
|
|
- sub r9, 64
|
|
|
- neg rbp
|
|
|
- mov r8, r9
|
|
|
- sub r9, 128
|
|
|
- mov rcx, QWORD PTR [r10]
|
|
|
- mov rdx, r14
|
|
|
- pext rcx, rcx, rbp
|
|
|
- sub rdx, rcx
|
|
|
- mov rcx, QWORD PTR [r10+8]
|
|
|
- mov rax, r15
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+16]
|
|
|
- mov rcx, rdi
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+8], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov rdx, rsi
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+16], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+24], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+40]
|
|
|
- mov rcx, QWORD PTR [r8+40]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+32], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- mov rdx, QWORD PTR [r8+48]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+40], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+56]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+48], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+64]
|
|
|
- mov rcx, QWORD PTR [r8+64]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+56], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov rdx, QWORD PTR [r8+72]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+64], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+72], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+88]
|
|
|
- mov rcx, QWORD PTR [r8+88]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+80], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- mov rdx, QWORD PTR [r8+96]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+88], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+104]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+96], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+112]
|
|
|
- mov rcx, QWORD PTR [r8+112]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+104], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov rdx, QWORD PTR [r8+120]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+112], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov QWORD PTR [r9+120], rdx
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_mont_reduce_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_get_from_table_avx2_16 PROC
|
|
|
- sub rsp, 128
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- mov rax, 1
|
|
|
- movd xmm10, r8
|
|
|
- movd xmm11, rax
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpermd ymm10, ymm13, ymm10
|
|
|
- vpermd ymm11, ymm13, ymm11
|
|
|
- ; START: 0-15
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpxor ymm7, ymm7, ymm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- vmovdqu YMMWORD PTR [rcx+64], ymm6
|
|
|
- vmovdqu YMMWORD PTR [rcx+96], ymm7
|
|
|
- ; END: 0-15
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_2048_get_from_table_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_cond_sub_32 PROC
|
|
|
- sub rsp, 256
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov r10, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [r8+136]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+128], r10
|
|
|
- mov QWORD PTR [rsp+136], r11
|
|
|
- mov r10, QWORD PTR [r8+144]
|
|
|
- mov r11, QWORD PTR [r8+152]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+144], r10
|
|
|
- mov QWORD PTR [rsp+152], r11
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+160], r10
|
|
|
- mov QWORD PTR [rsp+168], r11
|
|
|
- mov r10, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [r8+184]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+176], r10
|
|
|
- mov QWORD PTR [rsp+184], r11
|
|
|
- mov r10, QWORD PTR [r8+192]
|
|
|
- mov r11, QWORD PTR [r8+200]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+192], r10
|
|
|
- mov QWORD PTR [rsp+200], r11
|
|
|
- mov r10, QWORD PTR [r8+208]
|
|
|
- mov r11, QWORD PTR [r8+216]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+208], r10
|
|
|
- mov QWORD PTR [rsp+216], r11
|
|
|
- mov r10, QWORD PTR [r8+224]
|
|
|
- mov r11, QWORD PTR [r8+232]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+224], r10
|
|
|
- mov QWORD PTR [rsp+232], r11
|
|
|
- mov r10, QWORD PTR [r8+240]
|
|
|
- mov r11, QWORD PTR [r8+248]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+240], r10
|
|
|
- mov QWORD PTR [rsp+248], r11
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rsp]
|
|
|
- sub r10, r8
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rsp+8]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r8, QWORD PTR [rsp+16]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rsp+24]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rsp+32]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rsp+40]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r8, QWORD PTR [rsp+48]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rsp+56]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov r8, QWORD PTR [rsp+64]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov r11, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rsp+72]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r8, QWORD PTR [rsp+80]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rsp+88]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- mov r8, QWORD PTR [rsp+96]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rsp+104]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r8, QWORD PTR [rsp+112]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rsp+120]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov r10, QWORD PTR [rdx+128]
|
|
|
- mov r8, QWORD PTR [rsp+128]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- mov r11, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [rsp+136]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+128], r10
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- mov r8, QWORD PTR [rsp+144]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+136], r11
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rsp+152]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- mov r10, QWORD PTR [rdx+160]
|
|
|
- mov r8, QWORD PTR [rsp+160]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- mov r11, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [rsp+168]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+160], r10
|
|
|
- mov r10, QWORD PTR [rdx+176]
|
|
|
- mov r8, QWORD PTR [rsp+176]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+168], r11
|
|
|
- mov r11, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rsp+184]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+176], r10
|
|
|
- mov r10, QWORD PTR [rdx+192]
|
|
|
- mov r8, QWORD PTR [rsp+192]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+184], r11
|
|
|
- mov r11, QWORD PTR [rdx+200]
|
|
|
- mov r8, QWORD PTR [rsp+200]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- mov r10, QWORD PTR [rdx+208]
|
|
|
- mov r8, QWORD PTR [rsp+208]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- mov r11, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [rsp+216]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+208], r10
|
|
|
- mov r10, QWORD PTR [rdx+224]
|
|
|
- mov r8, QWORD PTR [rsp+224]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+216], r11
|
|
|
- mov r11, QWORD PTR [rdx+232]
|
|
|
- mov r8, QWORD PTR [rsp+232]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+224], r10
|
|
|
- mov r10, QWORD PTR [rdx+240]
|
|
|
- mov r8, QWORD PTR [rsp+240]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+232], r11
|
|
|
- mov r11, QWORD PTR [rdx+248]
|
|
|
- mov r8, QWORD PTR [rsp+248]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- sbb rax, rax
|
|
|
- add rsp, 256
|
|
|
- ret
|
|
|
-sp_2048_cond_sub_32 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 2048 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mont_reduce_32 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov r9, rdx
|
|
|
- xor rsi, rsi
|
|
|
- ; i = 32
|
|
|
- mov r10, 32
|
|
|
- mov r15, QWORD PTR [rcx]
|
|
|
- mov rdi, QWORD PTR [rcx+8]
|
|
|
-L_2048_mont_reduce_32_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov r13, r15
|
|
|
- imul r13, r8
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- add r15, rax
|
|
|
- adc r12, rdx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- mov r15, rdi
|
|
|
- add r15, rax
|
|
|
- adc r11, rdx
|
|
|
- add r15, r12
|
|
|
- adc r11, 0
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- mov rdi, QWORD PTR [rcx+16]
|
|
|
- add rdi, rax
|
|
|
- adc r12, rdx
|
|
|
- add rdi, r11
|
|
|
- adc r12, 0
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- mov r14, QWORD PTR [rcx+24]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+24], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- mov r14, QWORD PTR [rcx+32]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+32], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- mov r14, QWORD PTR [rcx+40]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+40], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- mov r14, QWORD PTR [rcx+48]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- mov r14, QWORD PTR [rcx+56]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+56], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- mov r14, QWORD PTR [rcx+64]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+64], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- mov r14, QWORD PTR [rcx+72]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+72], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- mov r14, QWORD PTR [rcx+80]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+80], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- mov r14, QWORD PTR [rcx+88]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+88], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- mov r14, QWORD PTR [rcx+96]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+96], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- mov r14, QWORD PTR [rcx+104]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+104], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- mov r14, QWORD PTR [rcx+112]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+112], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- mov r14, QWORD PTR [rcx+120]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+120], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+16] += m[16] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+128]
|
|
|
- mov r14, QWORD PTR [rcx+128]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+128], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+17] += m[17] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+136]
|
|
|
- mov r14, QWORD PTR [rcx+136]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+136], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+18] += m[18] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+144]
|
|
|
- mov r14, QWORD PTR [rcx+144]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+144], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+19] += m[19] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+152]
|
|
|
- mov r14, QWORD PTR [rcx+152]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+152], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+20] += m[20] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+160]
|
|
|
- mov r14, QWORD PTR [rcx+160]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+160], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+21] += m[21] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+168]
|
|
|
- mov r14, QWORD PTR [rcx+168]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+168], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+22] += m[22] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+176]
|
|
|
- mov r14, QWORD PTR [rcx+176]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+176], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+23] += m[23] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+184]
|
|
|
- mov r14, QWORD PTR [rcx+184]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+184], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+24] += m[24] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+192]
|
|
|
- mov r14, QWORD PTR [rcx+192]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+192], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+25] += m[25] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+200]
|
|
|
- mov r14, QWORD PTR [rcx+200]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+200], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+26] += m[26] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+208]
|
|
|
- mov r14, QWORD PTR [rcx+208]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+208], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+27] += m[27] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+216]
|
|
|
- mov r14, QWORD PTR [rcx+216]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+216], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+28] += m[28] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+224]
|
|
|
- mov r14, QWORD PTR [rcx+224]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+224], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+29] += m[29] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+232]
|
|
|
- mov r14, QWORD PTR [rcx+232]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+232], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+30] += m[30] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+240]
|
|
|
- mov r14, QWORD PTR [rcx+240]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+240], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+31] += m[31] * mu
|
|
|
- mov rax, r13
|
|
|
- mul QWORD PTR [r9+248]
|
|
|
- mov r14, QWORD PTR [rcx+248]
|
|
|
- add r12, rax
|
|
|
- adc rdx, rsi
|
|
|
- mov rsi, 0
|
|
|
- adc rsi, 0
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+248], r14
|
|
|
- adc QWORD PTR [rcx+256], rdx
|
|
|
- adc rsi, 0
|
|
|
- ; i -= 1
|
|
|
- add rcx, 8
|
|
|
- dec r10
|
|
|
- jnz L_2048_mont_reduce_32_loop
|
|
|
- mov QWORD PTR [rcx], r15
|
|
|
- mov QWORD PTR [rcx+8], rdi
|
|
|
- neg rsi
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, r9
|
|
|
- mov r9, rsi
|
|
|
-ELSE
|
|
|
- mov r9, rsi
|
|
|
- mov r8, r9
|
|
|
-ENDIF
|
|
|
- mov rdx, rcx
|
|
|
- mov rcx, rcx
|
|
|
- sub rcx, 256
|
|
|
- call sp_2048_cond_sub_32
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_mont_reduce_32 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Sub b from a into r. (r = a - b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_sub_32 PROC
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- sub r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- sbb r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- sbb r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- sbb r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- sbb r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- sbb r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- sbb r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- sbb r9, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- sbb r10, QWORD PTR [r8+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- sbb r9, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+80], r9
|
|
|
- sbb r10, QWORD PTR [r8+88]
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- sbb r9, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [rcx+96], r9
|
|
|
- sbb r10, QWORD PTR [r8+104]
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- sbb r9, QWORD PTR [r8+112]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+112], r9
|
|
|
- sbb r10, QWORD PTR [r8+120]
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- sbb r9, QWORD PTR [r8+128]
|
|
|
- mov r10, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [rcx+128], r9
|
|
|
- sbb r10, QWORD PTR [r8+136]
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [rcx+136], r10
|
|
|
- sbb r9, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [rcx+144], r9
|
|
|
- sbb r10, QWORD PTR [r8+152]
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [rcx+152], r10
|
|
|
- sbb r9, QWORD PTR [r8+160]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [rcx+160], r9
|
|
|
- sbb r10, QWORD PTR [r8+168]
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- sbb r9, QWORD PTR [r8+176]
|
|
|
- mov r10, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [rcx+176], r9
|
|
|
- sbb r10, QWORD PTR [r8+184]
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- mov QWORD PTR [rcx+184], r10
|
|
|
- sbb r9, QWORD PTR [r8+192]
|
|
|
- mov r10, QWORD PTR [rdx+200]
|
|
|
- mov QWORD PTR [rcx+192], r9
|
|
|
- sbb r10, QWORD PTR [r8+200]
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- mov QWORD PTR [rcx+200], r10
|
|
|
- sbb r9, QWORD PTR [r8+208]
|
|
|
- mov r10, QWORD PTR [rdx+216]
|
|
|
- mov QWORD PTR [rcx+208], r9
|
|
|
- sbb r10, QWORD PTR [r8+216]
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- sbb r9, QWORD PTR [r8+224]
|
|
|
- mov r10, QWORD PTR [rdx+232]
|
|
|
- mov QWORD PTR [rcx+224], r9
|
|
|
- sbb r10, QWORD PTR [r8+232]
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- mov QWORD PTR [rcx+232], r10
|
|
|
- sbb r9, QWORD PTR [r8+240]
|
|
|
- mov r10, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [rcx+240], r9
|
|
|
- sbb r10, QWORD PTR [r8+248]
|
|
|
- mov QWORD PTR [rcx+248], r10
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_2048_sub_32 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mul_d_avx2_32 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rdx, r8
|
|
|
- xor r13, r13
|
|
|
- mulx r12, r11, QWORD PTR [rax]
|
|
|
- mov QWORD PTR [rcx], r11
|
|
|
- ; A[1] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+8], r12
|
|
|
- ; A[2] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- ; A[3] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- ; A[4] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- ; A[5] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- ; A[6] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+48]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- ; A[7] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+56]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- ; A[8] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+64]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- ; A[9] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+72]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+72], r12
|
|
|
- ; A[10] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+80]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- ; A[11] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+88]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- ; A[12] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+96]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+96], r11
|
|
|
- ; A[13] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+104]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+104], r12
|
|
|
- ; A[14] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+112]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- ; A[15] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+120]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+120], r12
|
|
|
- ; A[16] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+128]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- ; A[17] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+136]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- ; A[18] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+144]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+144], r11
|
|
|
- ; A[19] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+152]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+152], r12
|
|
|
- ; A[20] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+160]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+160], r11
|
|
|
- ; A[21] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+168]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+168], r12
|
|
|
- ; A[22] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+176]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- ; A[23] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+184]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- ; A[24] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+192]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+192], r11
|
|
|
- ; A[25] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+200]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+200], r12
|
|
|
- ; A[26] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+208]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+208], r11
|
|
|
- ; A[27] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+216]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+216], r12
|
|
|
- ; A[28] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+224]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- ; A[29] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+232]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- ; A[30] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+240]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+240], r11
|
|
|
- ; A[31] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+248]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- adcx r11, r13
|
|
|
- mov QWORD PTR [rcx+248], r12
|
|
|
- mov QWORD PTR [rcx+256], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_mul_d_avx2_32 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF _WIN64
|
|
|
-; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
|
|
|
-; *
|
|
|
-; * d1 The high order half of the number to divide.
|
|
|
-; * d0 The low order half of the number to divide.
|
|
|
-; * div The dividend.
|
|
|
-; * returns the result of the division.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-div_2048_word_asm_32 PROC
|
|
|
- mov r9, rdx
|
|
|
- mov rax, r9
|
|
|
- mov rdx, rcx
|
|
|
- div r8
|
|
|
- ret
|
|
|
-div_2048_word_asm_32 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_cond_sub_avx2_32 PROC
|
|
|
- push r12
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- pext r12, r12, r9
|
|
|
- sub r10, r12
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+104]
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [rdx+128]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+136]
|
|
|
- mov r12, QWORD PTR [rdx+136]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+152]
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r12, QWORD PTR [rdx+160]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [rdx+176]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+184]
|
|
|
- mov r12, QWORD PTR [rdx+184]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+192]
|
|
|
- mov r10, QWORD PTR [rdx+192]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+200]
|
|
|
- mov r11, QWORD PTR [rdx+200]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+208]
|
|
|
- mov r12, QWORD PTR [rdx+208]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+216]
|
|
|
- mov r10, QWORD PTR [rdx+216]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+208], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+224]
|
|
|
- mov r11, QWORD PTR [rdx+224]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+232]
|
|
|
- mov r12, QWORD PTR [rdx+232]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+240]
|
|
|
- mov r10, QWORD PTR [rdx+240]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+248]
|
|
|
- mov r11, QWORD PTR [rdx+248]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- sbb r11, r12
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- sbb rax, rax
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_cond_sub_avx2_32 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Compare a with b in constant time.
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; * return -ve, 0 or +ve if a is less than, equal to or greater than b
|
|
|
-; * respectively.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_cmp_32 PROC
|
|
|
- push r12
|
|
|
- xor r9, r9
|
|
|
- mov r8, -1
|
|
|
- mov rax, -1
|
|
|
- mov r10, 1
|
|
|
- mov r11, QWORD PTR [rcx+248]
|
|
|
- mov r12, QWORD PTR [rdx+248]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+240]
|
|
|
- mov r12, QWORD PTR [rdx+240]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+232]
|
|
|
- mov r12, QWORD PTR [rdx+232]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+224]
|
|
|
- mov r12, QWORD PTR [rdx+224]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+216]
|
|
|
- mov r12, QWORD PTR [rdx+216]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+208]
|
|
|
- mov r12, QWORD PTR [rdx+208]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+200]
|
|
|
- mov r12, QWORD PTR [rdx+200]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+192]
|
|
|
- mov r12, QWORD PTR [rdx+192]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+184]
|
|
|
- mov r12, QWORD PTR [rdx+184]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+176]
|
|
|
- mov r12, QWORD PTR [rdx+176]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+168]
|
|
|
- mov r12, QWORD PTR [rdx+168]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+160]
|
|
|
- mov r12, QWORD PTR [rdx+160]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+152]
|
|
|
- mov r12, QWORD PTR [rdx+152]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+144]
|
|
|
- mov r12, QWORD PTR [rdx+144]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+136]
|
|
|
- mov r12, QWORD PTR [rdx+136]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+128]
|
|
|
- mov r12, QWORD PTR [rdx+128]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+120]
|
|
|
- mov r12, QWORD PTR [rdx+120]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+104]
|
|
|
- mov r12, QWORD PTR [rdx+104]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+96]
|
|
|
- mov r12, QWORD PTR [rdx+96]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+80]
|
|
|
- mov r12, QWORD PTR [rdx+80]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+72]
|
|
|
- mov r12, QWORD PTR [rdx+72]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- mov r12, QWORD PTR [rdx+56]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+48]
|
|
|
- mov r12, QWORD PTR [rdx+48]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+32]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+8]
|
|
|
- mov r12, QWORD PTR [rdx+8]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx]
|
|
|
- mov r12, QWORD PTR [rdx]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- xor rax, r8
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_cmp_32 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_get_from_table_32 PROC
|
|
|
- sub rsp, 128
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- mov rax, 1
|
|
|
- movd xmm10, r8
|
|
|
- movd xmm11, rax
|
|
|
- pxor xmm13, xmm13
|
|
|
- pshufd xmm11, xmm11, 0
|
|
|
- pshufd xmm10, xmm10, 0
|
|
|
- ; START: 0-7
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 32
|
|
|
- mov r9, QWORD PTR [rdx+256]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 33
|
|
|
- mov r9, QWORD PTR [rdx+264]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 34
|
|
|
- mov r9, QWORD PTR [rdx+272]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 35
|
|
|
- mov r9, QWORD PTR [rdx+280]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 36
|
|
|
- mov r9, QWORD PTR [rdx+288]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 37
|
|
|
- mov r9, QWORD PTR [rdx+296]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 38
|
|
|
- mov r9, QWORD PTR [rdx+304]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 39
|
|
|
- mov r9, QWORD PTR [rdx+312]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 40
|
|
|
- mov r9, QWORD PTR [rdx+320]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 41
|
|
|
- mov r9, QWORD PTR [rdx+328]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 42
|
|
|
- mov r9, QWORD PTR [rdx+336]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 43
|
|
|
- mov r9, QWORD PTR [rdx+344]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 44
|
|
|
- mov r9, QWORD PTR [rdx+352]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 45
|
|
|
- mov r9, QWORD PTR [rdx+360]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 46
|
|
|
- mov r9, QWORD PTR [rdx+368]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 47
|
|
|
- mov r9, QWORD PTR [rdx+376]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 48
|
|
|
- mov r9, QWORD PTR [rdx+384]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 49
|
|
|
- mov r9, QWORD PTR [rdx+392]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 50
|
|
|
- mov r9, QWORD PTR [rdx+400]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 51
|
|
|
- mov r9, QWORD PTR [rdx+408]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 52
|
|
|
- mov r9, QWORD PTR [rdx+416]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 53
|
|
|
- mov r9, QWORD PTR [rdx+424]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 54
|
|
|
- mov r9, QWORD PTR [rdx+432]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 55
|
|
|
- mov r9, QWORD PTR [rdx+440]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 56
|
|
|
- mov r9, QWORD PTR [rdx+448]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 57
|
|
|
- mov r9, QWORD PTR [rdx+456]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 58
|
|
|
- mov r9, QWORD PTR [rdx+464]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 59
|
|
|
- mov r9, QWORD PTR [rdx+472]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 60
|
|
|
- mov r9, QWORD PTR [rdx+480]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 61
|
|
|
- mov r9, QWORD PTR [rdx+488]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 62
|
|
|
- mov r9, QWORD PTR [rdx+496]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 63
|
|
|
- mov r9, QWORD PTR [rdx+504]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 0-7
|
|
|
- ; START: 8-15
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 32
|
|
|
- mov r9, QWORD PTR [rdx+256]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 33
|
|
|
- mov r9, QWORD PTR [rdx+264]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 34
|
|
|
- mov r9, QWORD PTR [rdx+272]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 35
|
|
|
- mov r9, QWORD PTR [rdx+280]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 36
|
|
|
- mov r9, QWORD PTR [rdx+288]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 37
|
|
|
- mov r9, QWORD PTR [rdx+296]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 38
|
|
|
- mov r9, QWORD PTR [rdx+304]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 39
|
|
|
- mov r9, QWORD PTR [rdx+312]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 40
|
|
|
- mov r9, QWORD PTR [rdx+320]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 41
|
|
|
- mov r9, QWORD PTR [rdx+328]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 42
|
|
|
- mov r9, QWORD PTR [rdx+336]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 43
|
|
|
- mov r9, QWORD PTR [rdx+344]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 44
|
|
|
- mov r9, QWORD PTR [rdx+352]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 45
|
|
|
- mov r9, QWORD PTR [rdx+360]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 46
|
|
|
- mov r9, QWORD PTR [rdx+368]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 47
|
|
|
- mov r9, QWORD PTR [rdx+376]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 48
|
|
|
- mov r9, QWORD PTR [rdx+384]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 49
|
|
|
- mov r9, QWORD PTR [rdx+392]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 50
|
|
|
- mov r9, QWORD PTR [rdx+400]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 51
|
|
|
- mov r9, QWORD PTR [rdx+408]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 52
|
|
|
- mov r9, QWORD PTR [rdx+416]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 53
|
|
|
- mov r9, QWORD PTR [rdx+424]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 54
|
|
|
- mov r9, QWORD PTR [rdx+432]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 55
|
|
|
- mov r9, QWORD PTR [rdx+440]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 56
|
|
|
- mov r9, QWORD PTR [rdx+448]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 57
|
|
|
- mov r9, QWORD PTR [rdx+456]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 58
|
|
|
- mov r9, QWORD PTR [rdx+464]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 59
|
|
|
- mov r9, QWORD PTR [rdx+472]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 60
|
|
|
- mov r9, QWORD PTR [rdx+480]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 61
|
|
|
- mov r9, QWORD PTR [rdx+488]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 62
|
|
|
- mov r9, QWORD PTR [rdx+496]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 63
|
|
|
- mov r9, QWORD PTR [rdx+504]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 8-15
|
|
|
- ; START: 16-23
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 32
|
|
|
- mov r9, QWORD PTR [rdx+256]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 33
|
|
|
- mov r9, QWORD PTR [rdx+264]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 34
|
|
|
- mov r9, QWORD PTR [rdx+272]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 35
|
|
|
- mov r9, QWORD PTR [rdx+280]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 36
|
|
|
- mov r9, QWORD PTR [rdx+288]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 37
|
|
|
- mov r9, QWORD PTR [rdx+296]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 38
|
|
|
- mov r9, QWORD PTR [rdx+304]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 39
|
|
|
- mov r9, QWORD PTR [rdx+312]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 40
|
|
|
- mov r9, QWORD PTR [rdx+320]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 41
|
|
|
- mov r9, QWORD PTR [rdx+328]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 42
|
|
|
- mov r9, QWORD PTR [rdx+336]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 43
|
|
|
- mov r9, QWORD PTR [rdx+344]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 44
|
|
|
- mov r9, QWORD PTR [rdx+352]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 45
|
|
|
- mov r9, QWORD PTR [rdx+360]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 46
|
|
|
- mov r9, QWORD PTR [rdx+368]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 47
|
|
|
- mov r9, QWORD PTR [rdx+376]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 48
|
|
|
- mov r9, QWORD PTR [rdx+384]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 49
|
|
|
- mov r9, QWORD PTR [rdx+392]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 50
|
|
|
- mov r9, QWORD PTR [rdx+400]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 51
|
|
|
- mov r9, QWORD PTR [rdx+408]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 52
|
|
|
- mov r9, QWORD PTR [rdx+416]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 53
|
|
|
- mov r9, QWORD PTR [rdx+424]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 54
|
|
|
- mov r9, QWORD PTR [rdx+432]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 55
|
|
|
- mov r9, QWORD PTR [rdx+440]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 56
|
|
|
- mov r9, QWORD PTR [rdx+448]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 57
|
|
|
- mov r9, QWORD PTR [rdx+456]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 58
|
|
|
- mov r9, QWORD PTR [rdx+464]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 59
|
|
|
- mov r9, QWORD PTR [rdx+472]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 60
|
|
|
- mov r9, QWORD PTR [rdx+480]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 61
|
|
|
- mov r9, QWORD PTR [rdx+488]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 62
|
|
|
- mov r9, QWORD PTR [rdx+496]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 63
|
|
|
- mov r9, QWORD PTR [rdx+504]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 16-23
|
|
|
- ; START: 24-31
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 32
|
|
|
- mov r9, QWORD PTR [rdx+256]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 33
|
|
|
- mov r9, QWORD PTR [rdx+264]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 34
|
|
|
- mov r9, QWORD PTR [rdx+272]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 35
|
|
|
- mov r9, QWORD PTR [rdx+280]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 36
|
|
|
- mov r9, QWORD PTR [rdx+288]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 37
|
|
|
- mov r9, QWORD PTR [rdx+296]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 38
|
|
|
- mov r9, QWORD PTR [rdx+304]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 39
|
|
|
- mov r9, QWORD PTR [rdx+312]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 40
|
|
|
- mov r9, QWORD PTR [rdx+320]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 41
|
|
|
- mov r9, QWORD PTR [rdx+328]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 42
|
|
|
- mov r9, QWORD PTR [rdx+336]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 43
|
|
|
- mov r9, QWORD PTR [rdx+344]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 44
|
|
|
- mov r9, QWORD PTR [rdx+352]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 45
|
|
|
- mov r9, QWORD PTR [rdx+360]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 46
|
|
|
- mov r9, QWORD PTR [rdx+368]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 47
|
|
|
- mov r9, QWORD PTR [rdx+376]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 48
|
|
|
- mov r9, QWORD PTR [rdx+384]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 49
|
|
|
- mov r9, QWORD PTR [rdx+392]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 50
|
|
|
- mov r9, QWORD PTR [rdx+400]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 51
|
|
|
- mov r9, QWORD PTR [rdx+408]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 52
|
|
|
- mov r9, QWORD PTR [rdx+416]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 53
|
|
|
- mov r9, QWORD PTR [rdx+424]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 54
|
|
|
- mov r9, QWORD PTR [rdx+432]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 55
|
|
|
- mov r9, QWORD PTR [rdx+440]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 56
|
|
|
- mov r9, QWORD PTR [rdx+448]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 57
|
|
|
- mov r9, QWORD PTR [rdx+456]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 58
|
|
|
- mov r9, QWORD PTR [rdx+464]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 59
|
|
|
- mov r9, QWORD PTR [rdx+472]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 60
|
|
|
- mov r9, QWORD PTR [rdx+480]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 61
|
|
|
- mov r9, QWORD PTR [rdx+488]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 62
|
|
|
- mov r9, QWORD PTR [rdx+496]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 63
|
|
|
- mov r9, QWORD PTR [rdx+504]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- ; END: 24-31
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_2048_get_from_table_32 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Reduce the number back to 2048 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_mont_reduce_avx2_32 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- mov r9, rcx
|
|
|
- mov r10, rdx
|
|
|
- xor rbp, rbp
|
|
|
- ; i = 32
|
|
|
- mov r11, 32
|
|
|
- mov r14, QWORD PTR [r9]
|
|
|
- mov r15, QWORD PTR [r9+8]
|
|
|
- mov rdi, QWORD PTR [r9+16]
|
|
|
- mov rsi, QWORD PTR [r9+24]
|
|
|
- add r9, 128
|
|
|
- xor rbp, rbp
|
|
|
-L_2048_mont_reduce_avx2_32_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r12, r14
|
|
|
- imul rdx, r8
|
|
|
- xor rbx, rbx
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10]
|
|
|
- mov r14, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r14, rcx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+8]
|
|
|
- mov r15, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+16]
|
|
|
- mov rdi, rsi
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+24]
|
|
|
- mov rsi, QWORD PTR [r9+-96]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+32]
|
|
|
- mov r13, QWORD PTR [r9+-88]
|
|
|
- adcx rsi, rax
|
|
|
- adox r13, rcx
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+40]
|
|
|
- mov r12, QWORD PTR [r9+-80]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-88], r13
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+48]
|
|
|
- mov r13, QWORD PTR [r9+-72]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-80], r12
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+56]
|
|
|
- mov r12, QWORD PTR [r9+-64]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-72], r13
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+64]
|
|
|
- mov r13, QWORD PTR [r9+-56]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-64], r12
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+72]
|
|
|
- mov r12, QWORD PTR [r9+-48]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-56], r13
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+80]
|
|
|
- mov r13, QWORD PTR [r9+-40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-48], r12
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+88]
|
|
|
- mov r12, QWORD PTR [r9+-32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-40], r13
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+96]
|
|
|
- mov r13, QWORD PTR [r9+-24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-32], r12
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+104]
|
|
|
- mov r12, QWORD PTR [r9+-16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-24], r13
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+112]
|
|
|
- mov r13, QWORD PTR [r9+-8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-16], r12
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+120]
|
|
|
- mov r12, QWORD PTR [r9]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-8], r13
|
|
|
- ; a[i+16] += m[16] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+128]
|
|
|
- mov r13, QWORD PTR [r9+8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9], r12
|
|
|
- ; a[i+17] += m[17] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+136]
|
|
|
- mov r12, QWORD PTR [r9+16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+8], r13
|
|
|
- ; a[i+18] += m[18] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+144]
|
|
|
- mov r13, QWORD PTR [r9+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+16], r12
|
|
|
- ; a[i+19] += m[19] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+152]
|
|
|
- mov r12, QWORD PTR [r9+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+24], r13
|
|
|
- ; a[i+20] += m[20] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+160]
|
|
|
- mov r13, QWORD PTR [r9+40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+32], r12
|
|
|
- ; a[i+21] += m[21] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+168]
|
|
|
- mov r12, QWORD PTR [r9+48]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+40], r13
|
|
|
- ; a[i+22] += m[22] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+176]
|
|
|
- mov r13, QWORD PTR [r9+56]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+48], r12
|
|
|
- ; a[i+23] += m[23] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+184]
|
|
|
- mov r12, QWORD PTR [r9+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+56], r13
|
|
|
- ; a[i+24] += m[24] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+192]
|
|
|
- mov r13, QWORD PTR [r9+72]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+64], r12
|
|
|
- ; a[i+25] += m[25] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+200]
|
|
|
- mov r12, QWORD PTR [r9+80]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+72], r13
|
|
|
- ; a[i+26] += m[26] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+208]
|
|
|
- mov r13, QWORD PTR [r9+88]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+80], r12
|
|
|
- ; a[i+27] += m[27] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+216]
|
|
|
- mov r12, QWORD PTR [r9+96]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+88], r13
|
|
|
- ; a[i+28] += m[28] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+224]
|
|
|
- mov r13, QWORD PTR [r9+104]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+96], r12
|
|
|
- ; a[i+29] += m[29] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+232]
|
|
|
- mov r12, QWORD PTR [r9+112]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+104], r13
|
|
|
- ; a[i+30] += m[30] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+240]
|
|
|
- mov r13, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+112], r12
|
|
|
- ; a[i+31] += m[31] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+248]
|
|
|
- mov r12, QWORD PTR [r9+128]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+120], r13
|
|
|
- adcx r12, rbp
|
|
|
- mov rbp, rbx
|
|
|
- mov QWORD PTR [r9+128], r12
|
|
|
- adox rbp, rbx
|
|
|
- adcx rbp, rbx
|
|
|
- ; a += 1
|
|
|
- add r9, 8
|
|
|
- ; i -= 1
|
|
|
- sub r11, 1
|
|
|
- jnz L_2048_mont_reduce_avx2_32_loop
|
|
|
- sub r9, 128
|
|
|
- neg rbp
|
|
|
- mov r8, r9
|
|
|
- sub r9, 256
|
|
|
- mov rcx, QWORD PTR [r10]
|
|
|
- mov rdx, r14
|
|
|
- pext rcx, rcx, rbp
|
|
|
- sub rdx, rcx
|
|
|
- mov rcx, QWORD PTR [r10+8]
|
|
|
- mov rax, r15
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+16]
|
|
|
- mov rcx, rdi
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+8], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov rdx, rsi
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+16], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+24], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+40]
|
|
|
- mov rcx, QWORD PTR [r8+40]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+32], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- mov rdx, QWORD PTR [r8+48]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+40], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+56]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+48], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+64]
|
|
|
- mov rcx, QWORD PTR [r8+64]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+56], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov rdx, QWORD PTR [r8+72]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+64], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+72], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+88]
|
|
|
- mov rcx, QWORD PTR [r8+88]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+80], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- mov rdx, QWORD PTR [r8+96]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+88], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+104]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+96], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+112]
|
|
|
- mov rcx, QWORD PTR [r8+112]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+104], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov rdx, QWORD PTR [r8+120]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+112], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+128]
|
|
|
- mov rax, QWORD PTR [r8+128]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+120], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+136]
|
|
|
- mov rcx, QWORD PTR [r8+136]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+128], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+144]
|
|
|
- mov rdx, QWORD PTR [r8+144]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+136], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+152]
|
|
|
- mov rax, QWORD PTR [r8+152]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+144], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+160]
|
|
|
- mov rcx, QWORD PTR [r8+160]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+152], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov rdx, QWORD PTR [r8+168]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+160], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+176]
|
|
|
- mov rax, QWORD PTR [r8+176]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+168], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+184]
|
|
|
- mov rcx, QWORD PTR [r8+184]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+176], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+192]
|
|
|
- mov rdx, QWORD PTR [r8+192]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+184], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+200]
|
|
|
- mov rax, QWORD PTR [r8+200]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+192], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+208]
|
|
|
- mov rcx, QWORD PTR [r8+208]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+200], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+216]
|
|
|
- mov rdx, QWORD PTR [r8+216]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+208], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+224]
|
|
|
- mov rax, QWORD PTR [r8+224]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+216], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+232]
|
|
|
- mov rcx, QWORD PTR [r8+232]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+224], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+240]
|
|
|
- mov rdx, QWORD PTR [r8+240]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+232], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+248]
|
|
|
- mov rax, QWORD PTR [r8+248]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+240], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov QWORD PTR [r9+248], rax
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_mont_reduce_avx2_32 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_get_from_table_avx2_32 PROC
|
|
|
- sub rsp, 128
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- mov rax, 1
|
|
|
- movd xmm10, r8
|
|
|
- movd xmm11, rax
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpermd ymm10, ymm13, ymm10
|
|
|
- vpermd ymm11, ymm13, ymm11
|
|
|
- ; START: 0-15
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpxor ymm7, ymm7, ymm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 32
|
|
|
- mov r9, QWORD PTR [rdx+256]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 33
|
|
|
- mov r9, QWORD PTR [rdx+264]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 34
|
|
|
- mov r9, QWORD PTR [rdx+272]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 35
|
|
|
- mov r9, QWORD PTR [rdx+280]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 36
|
|
|
- mov r9, QWORD PTR [rdx+288]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 37
|
|
|
- mov r9, QWORD PTR [rdx+296]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 38
|
|
|
- mov r9, QWORD PTR [rdx+304]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 39
|
|
|
- mov r9, QWORD PTR [rdx+312]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 40
|
|
|
- mov r9, QWORD PTR [rdx+320]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 41
|
|
|
- mov r9, QWORD PTR [rdx+328]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 42
|
|
|
- mov r9, QWORD PTR [rdx+336]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 43
|
|
|
- mov r9, QWORD PTR [rdx+344]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 44
|
|
|
- mov r9, QWORD PTR [rdx+352]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 45
|
|
|
- mov r9, QWORD PTR [rdx+360]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 46
|
|
|
- mov r9, QWORD PTR [rdx+368]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 47
|
|
|
- mov r9, QWORD PTR [rdx+376]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 48
|
|
|
- mov r9, QWORD PTR [rdx+384]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 49
|
|
|
- mov r9, QWORD PTR [rdx+392]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 50
|
|
|
- mov r9, QWORD PTR [rdx+400]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 51
|
|
|
- mov r9, QWORD PTR [rdx+408]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 52
|
|
|
- mov r9, QWORD PTR [rdx+416]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 53
|
|
|
- mov r9, QWORD PTR [rdx+424]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 54
|
|
|
- mov r9, QWORD PTR [rdx+432]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 55
|
|
|
- mov r9, QWORD PTR [rdx+440]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 56
|
|
|
- mov r9, QWORD PTR [rdx+448]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 57
|
|
|
- mov r9, QWORD PTR [rdx+456]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 58
|
|
|
- mov r9, QWORD PTR [rdx+464]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 59
|
|
|
- mov r9, QWORD PTR [rdx+472]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 60
|
|
|
- mov r9, QWORD PTR [rdx+480]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 61
|
|
|
- mov r9, QWORD PTR [rdx+488]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 62
|
|
|
- mov r9, QWORD PTR [rdx+496]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 63
|
|
|
- mov r9, QWORD PTR [rdx+504]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- vmovdqu YMMWORD PTR [rcx+64], ymm6
|
|
|
- vmovdqu YMMWORD PTR [rcx+96], ymm7
|
|
|
- add rcx, 128
|
|
|
- ; END: 0-15
|
|
|
- ; START: 16-31
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpxor ymm7, ymm7, ymm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 32
|
|
|
- mov r9, QWORD PTR [rdx+256]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 33
|
|
|
- mov r9, QWORD PTR [rdx+264]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 34
|
|
|
- mov r9, QWORD PTR [rdx+272]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 35
|
|
|
- mov r9, QWORD PTR [rdx+280]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 36
|
|
|
- mov r9, QWORD PTR [rdx+288]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 37
|
|
|
- mov r9, QWORD PTR [rdx+296]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 38
|
|
|
- mov r9, QWORD PTR [rdx+304]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 39
|
|
|
- mov r9, QWORD PTR [rdx+312]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 40
|
|
|
- mov r9, QWORD PTR [rdx+320]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 41
|
|
|
- mov r9, QWORD PTR [rdx+328]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 42
|
|
|
- mov r9, QWORD PTR [rdx+336]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 43
|
|
|
- mov r9, QWORD PTR [rdx+344]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 44
|
|
|
- mov r9, QWORD PTR [rdx+352]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 45
|
|
|
- mov r9, QWORD PTR [rdx+360]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 46
|
|
|
- mov r9, QWORD PTR [rdx+368]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 47
|
|
|
- mov r9, QWORD PTR [rdx+376]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 48
|
|
|
- mov r9, QWORD PTR [rdx+384]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 49
|
|
|
- mov r9, QWORD PTR [rdx+392]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 50
|
|
|
- mov r9, QWORD PTR [rdx+400]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 51
|
|
|
- mov r9, QWORD PTR [rdx+408]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 52
|
|
|
- mov r9, QWORD PTR [rdx+416]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 53
|
|
|
- mov r9, QWORD PTR [rdx+424]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 54
|
|
|
- mov r9, QWORD PTR [rdx+432]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 55
|
|
|
- mov r9, QWORD PTR [rdx+440]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 56
|
|
|
- mov r9, QWORD PTR [rdx+448]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 57
|
|
|
- mov r9, QWORD PTR [rdx+456]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 58
|
|
|
- mov r9, QWORD PTR [rdx+464]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 59
|
|
|
- mov r9, QWORD PTR [rdx+472]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 60
|
|
|
- mov r9, QWORD PTR [rdx+480]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 61
|
|
|
- mov r9, QWORD PTR [rdx+488]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 62
|
|
|
- mov r9, QWORD PTR [rdx+496]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 63
|
|
|
- mov r9, QWORD PTR [rdx+504]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- vmovdqu YMMWORD PTR [rcx+64], ymm6
|
|
|
- vmovdqu YMMWORD PTR [rcx+96], ymm7
|
|
|
- ; END: 16-31
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_2048_get_from_table_avx2_32 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Conditionally add a and b using the mask m.
|
|
|
-; * m is -1 to add and 0 when not.
|
|
|
-; *
|
|
|
-; * r A single precision number representing conditional add result.
|
|
|
-; * a A single precision number to add with.
|
|
|
-; * b A single precision number to add.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_cond_add_16 PROC
|
|
|
- sub rsp, 128
|
|
|
- mov rax, 0
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rsp]
|
|
|
- add r10, r8
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rsp+8]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r8, QWORD PTR [rsp+16]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rsp+24]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rsp+32]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rsp+40]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r8, QWORD PTR [rsp+48]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rsp+56]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov r8, QWORD PTR [rsp+64]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov r11, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rsp+72]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r8, QWORD PTR [rsp+80]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rsp+88]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- mov r8, QWORD PTR [rsp+96]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rsp+104]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r8, QWORD PTR [rsp+112]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rsp+120]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- adc rax, 0
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_2048_cond_add_16 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally add a and b using the mask m.
|
|
|
-; * m is -1 to add and 0 when not.
|
|
|
-; *
|
|
|
-; * r A single precision number representing conditional add result.
|
|
|
-; * a A single precision number to add with.
|
|
|
-; * b A single precision number to add.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_cond_add_avx2_16 PROC
|
|
|
- push r12
|
|
|
- mov rax, 0
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- pext r12, r12, r9
|
|
|
- add r10, r12
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+104]
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- adc r10, r11
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc rax, 0
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_cond_add_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Shift number left by n bit. (r = a << n)
|
|
|
-; *
|
|
|
-; * r Result of left shift by n.
|
|
|
-; * a Number to shift.
|
|
|
-; * n Amoutnt o shift.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_2048_lshift_32 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov cl, r8b
|
|
|
- mov rax, rcx
|
|
|
- mov r12, 0
|
|
|
- mov r13, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [rdx+224]
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- mov r10, QWORD PTR [rdx+240]
|
|
|
- mov r11, QWORD PTR [rdx+248]
|
|
|
- shld r12, r11, cl
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+224], r8
|
|
|
- mov QWORD PTR [rax+232], r9
|
|
|
- mov QWORD PTR [rax+240], r10
|
|
|
- mov QWORD PTR [rax+248], r11
|
|
|
- mov QWORD PTR [rax+256], r12
|
|
|
- mov r11, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rdx+192]
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- mov r10, QWORD PTR [rdx+208]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+192], r8
|
|
|
- mov QWORD PTR [rax+200], r9
|
|
|
- mov QWORD PTR [rax+208], r10
|
|
|
- mov QWORD PTR [rax+216], r13
|
|
|
- mov r13, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rdx+160]
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- mov r10, QWORD PTR [rdx+176]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+160], r8
|
|
|
- mov QWORD PTR [rax+168], r9
|
|
|
- mov QWORD PTR [rax+176], r10
|
|
|
- mov QWORD PTR [rax+184], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rdx+128]
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+128], r8
|
|
|
- mov QWORD PTR [rax+136], r9
|
|
|
- mov QWORD PTR [rax+144], r10
|
|
|
- mov QWORD PTR [rax+152], r13
|
|
|
- mov r13, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+96], r8
|
|
|
- mov QWORD PTR [rax+104], r9
|
|
|
- mov QWORD PTR [rax+112], r10
|
|
|
- mov QWORD PTR [rax+120], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+64], r8
|
|
|
- mov QWORD PTR [rax+72], r9
|
|
|
- mov QWORD PTR [rax+80], r10
|
|
|
- mov QWORD PTR [rax+88], r13
|
|
|
- mov r13, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+32], r8
|
|
|
- mov QWORD PTR [rax+40], r9
|
|
|
- mov QWORD PTR [rax+48], r10
|
|
|
- mov QWORD PTR [rax+56], r11
|
|
|
- mov r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shl r8, cl
|
|
|
- mov QWORD PTR [rax], r8
|
|
|
- mov QWORD PTR [rax+8], r9
|
|
|
- mov QWORD PTR [rax+16], r10
|
|
|
- mov QWORD PTR [rax+24], r13
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_2048_lshift_32 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-IFNDEF WOLFSSL_SP_NO_3072
|
|
|
-IFNDEF WOLFSSL_SP_NO_3072
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_from_bin_bswap PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 384
|
|
|
- xor r13, r13
|
|
|
- jmp L_3072_from_bin_bswap_64_end
|
|
|
-L_3072_from_bin_bswap_64_start:
|
|
|
- sub r11, 64
|
|
|
- mov rax, QWORD PTR [r11+56]
|
|
|
- mov r10, QWORD PTR [r11+48]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov rax, QWORD PTR [r11+40]
|
|
|
- mov r10, QWORD PTR [r11+32]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov rax, QWORD PTR [r11+24]
|
|
|
- mov r10, QWORD PTR [r11+16]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov rax, QWORD PTR [r11+8]
|
|
|
- mov r10, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_3072_from_bin_bswap_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_3072_from_bin_bswap_64_start
|
|
|
- jmp L_3072_from_bin_bswap_8_end
|
|
|
-L_3072_from_bin_bswap_8_start:
|
|
|
- sub r11, 8
|
|
|
- mov rax, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_3072_from_bin_bswap_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_3072_from_bin_bswap_8_start
|
|
|
- cmp r9, r13
|
|
|
- je L_3072_from_bin_bswap_hi_end
|
|
|
- mov r10, r13
|
|
|
- mov rax, r13
|
|
|
-L_3072_from_bin_bswap_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_3072_from_bin_bswap_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_3072_from_bin_bswap_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_3072_from_bin_bswap_zero_end
|
|
|
-L_3072_from_bin_bswap_zero_start:
|
|
|
- mov QWORD PTR [rcx], r13
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_3072_from_bin_bswap_zero_start
|
|
|
-L_3072_from_bin_bswap_zero_end:
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_from_bin_bswap ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the movbe instruction which is an optional instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_from_bin_movbe PROC
|
|
|
- push r12
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 384
|
|
|
- jmp L_3072_from_bin_movbe_64_end
|
|
|
-L_3072_from_bin_movbe_64_start:
|
|
|
- sub r11, 64
|
|
|
- movbe rax, QWORD PTR [r11+56]
|
|
|
- movbe r10, QWORD PTR [r11+48]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- movbe rax, QWORD PTR [r11+40]
|
|
|
- movbe r10, QWORD PTR [r11+32]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- movbe rax, QWORD PTR [r11+24]
|
|
|
- movbe r10, QWORD PTR [r11+16]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- movbe rax, QWORD PTR [r11+8]
|
|
|
- movbe r10, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_3072_from_bin_movbe_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_3072_from_bin_movbe_64_start
|
|
|
- jmp L_3072_from_bin_movbe_8_end
|
|
|
-L_3072_from_bin_movbe_8_start:
|
|
|
- sub r11, 8
|
|
|
- movbe rax, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_3072_from_bin_movbe_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_3072_from_bin_movbe_8_start
|
|
|
- cmp r9, 0
|
|
|
- je L_3072_from_bin_movbe_hi_end
|
|
|
- mov r10, 0
|
|
|
- mov rax, 0
|
|
|
-L_3072_from_bin_movbe_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_3072_from_bin_movbe_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_3072_from_bin_movbe_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_3072_from_bin_movbe_zero_end
|
|
|
-L_3072_from_bin_movbe_zero_start:
|
|
|
- mov QWORD PTR [rcx], 0
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_3072_from_bin_movbe_zero_start
|
|
|
-L_3072_from_bin_movbe_zero_end:
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_from_bin_movbe ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 384
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_to_bin_bswap_48 PROC
|
|
|
- mov rax, QWORD PTR [rcx+376]
|
|
|
- mov r8, QWORD PTR [rcx+368]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx], rax
|
|
|
- mov QWORD PTR [rdx+8], r8
|
|
|
- mov rax, QWORD PTR [rcx+360]
|
|
|
- mov r8, QWORD PTR [rcx+352]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+16], rax
|
|
|
- mov QWORD PTR [rdx+24], r8
|
|
|
- mov rax, QWORD PTR [rcx+344]
|
|
|
- mov r8, QWORD PTR [rcx+336]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+32], rax
|
|
|
- mov QWORD PTR [rdx+40], r8
|
|
|
- mov rax, QWORD PTR [rcx+328]
|
|
|
- mov r8, QWORD PTR [rcx+320]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+48], rax
|
|
|
- mov QWORD PTR [rdx+56], r8
|
|
|
- mov rax, QWORD PTR [rcx+312]
|
|
|
- mov r8, QWORD PTR [rcx+304]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+64], rax
|
|
|
- mov QWORD PTR [rdx+72], r8
|
|
|
- mov rax, QWORD PTR [rcx+296]
|
|
|
- mov r8, QWORD PTR [rcx+288]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+80], rax
|
|
|
- mov QWORD PTR [rdx+88], r8
|
|
|
- mov rax, QWORD PTR [rcx+280]
|
|
|
- mov r8, QWORD PTR [rcx+272]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+96], rax
|
|
|
- mov QWORD PTR [rdx+104], r8
|
|
|
- mov rax, QWORD PTR [rcx+264]
|
|
|
- mov r8, QWORD PTR [rcx+256]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+112], rax
|
|
|
- mov QWORD PTR [rdx+120], r8
|
|
|
- mov rax, QWORD PTR [rcx+248]
|
|
|
- mov r8, QWORD PTR [rcx+240]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+128], rax
|
|
|
- mov QWORD PTR [rdx+136], r8
|
|
|
- mov rax, QWORD PTR [rcx+232]
|
|
|
- mov r8, QWORD PTR [rcx+224]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+144], rax
|
|
|
- mov QWORD PTR [rdx+152], r8
|
|
|
- mov rax, QWORD PTR [rcx+216]
|
|
|
- mov r8, QWORD PTR [rcx+208]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+160], rax
|
|
|
- mov QWORD PTR [rdx+168], r8
|
|
|
- mov rax, QWORD PTR [rcx+200]
|
|
|
- mov r8, QWORD PTR [rcx+192]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+176], rax
|
|
|
- mov QWORD PTR [rdx+184], r8
|
|
|
- mov rax, QWORD PTR [rcx+184]
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+192], rax
|
|
|
- mov QWORD PTR [rdx+200], r8
|
|
|
- mov rax, QWORD PTR [rcx+168]
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+208], rax
|
|
|
- mov QWORD PTR [rdx+216], r8
|
|
|
- mov rax, QWORD PTR [rcx+152]
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+224], rax
|
|
|
- mov QWORD PTR [rdx+232], r8
|
|
|
- mov rax, QWORD PTR [rcx+136]
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+240], rax
|
|
|
- mov QWORD PTR [rdx+248], r8
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+256], rax
|
|
|
- mov QWORD PTR [rdx+264], r8
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+272], rax
|
|
|
- mov QWORD PTR [rdx+280], r8
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+288], rax
|
|
|
- mov QWORD PTR [rdx+296], r8
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+304], rax
|
|
|
- mov QWORD PTR [rdx+312], r8
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+320], rax
|
|
|
- mov QWORD PTR [rdx+328], r8
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+336], rax
|
|
|
- mov QWORD PTR [rdx+344], r8
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+352], rax
|
|
|
- mov QWORD PTR [rdx+360], r8
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+368], rax
|
|
|
- mov QWORD PTR [rdx+376], r8
|
|
|
- ret
|
|
|
-sp_3072_to_bin_bswap_48 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 384
|
|
|
-; * Uses the movbe instruction which is optional.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_to_bin_movbe_48 PROC
|
|
|
- movbe rax, QWORD PTR [rcx+376]
|
|
|
- movbe r8, QWORD PTR [rcx+368]
|
|
|
- mov QWORD PTR [rdx], rax
|
|
|
- mov QWORD PTR [rdx+8], r8
|
|
|
- movbe rax, QWORD PTR [rcx+360]
|
|
|
- movbe r8, QWORD PTR [rcx+352]
|
|
|
- mov QWORD PTR [rdx+16], rax
|
|
|
- mov QWORD PTR [rdx+24], r8
|
|
|
- movbe rax, QWORD PTR [rcx+344]
|
|
|
- movbe r8, QWORD PTR [rcx+336]
|
|
|
- mov QWORD PTR [rdx+32], rax
|
|
|
- mov QWORD PTR [rdx+40], r8
|
|
|
- movbe rax, QWORD PTR [rcx+328]
|
|
|
- movbe r8, QWORD PTR [rcx+320]
|
|
|
- mov QWORD PTR [rdx+48], rax
|
|
|
- mov QWORD PTR [rdx+56], r8
|
|
|
- movbe rax, QWORD PTR [rcx+312]
|
|
|
- movbe r8, QWORD PTR [rcx+304]
|
|
|
- mov QWORD PTR [rdx+64], rax
|
|
|
- mov QWORD PTR [rdx+72], r8
|
|
|
- movbe rax, QWORD PTR [rcx+296]
|
|
|
- movbe r8, QWORD PTR [rcx+288]
|
|
|
- mov QWORD PTR [rdx+80], rax
|
|
|
- mov QWORD PTR [rdx+88], r8
|
|
|
- movbe rax, QWORD PTR [rcx+280]
|
|
|
- movbe r8, QWORD PTR [rcx+272]
|
|
|
- mov QWORD PTR [rdx+96], rax
|
|
|
- mov QWORD PTR [rdx+104], r8
|
|
|
- movbe rax, QWORD PTR [rcx+264]
|
|
|
- movbe r8, QWORD PTR [rcx+256]
|
|
|
- mov QWORD PTR [rdx+112], rax
|
|
|
- mov QWORD PTR [rdx+120], r8
|
|
|
- movbe rax, QWORD PTR [rcx+248]
|
|
|
- movbe r8, QWORD PTR [rcx+240]
|
|
|
- mov QWORD PTR [rdx+128], rax
|
|
|
- mov QWORD PTR [rdx+136], r8
|
|
|
- movbe rax, QWORD PTR [rcx+232]
|
|
|
- movbe r8, QWORD PTR [rcx+224]
|
|
|
- mov QWORD PTR [rdx+144], rax
|
|
|
- mov QWORD PTR [rdx+152], r8
|
|
|
- movbe rax, QWORD PTR [rcx+216]
|
|
|
- movbe r8, QWORD PTR [rcx+208]
|
|
|
- mov QWORD PTR [rdx+160], rax
|
|
|
- mov QWORD PTR [rdx+168], r8
|
|
|
- movbe rax, QWORD PTR [rcx+200]
|
|
|
- movbe r8, QWORD PTR [rcx+192]
|
|
|
- mov QWORD PTR [rdx+176], rax
|
|
|
- mov QWORD PTR [rdx+184], r8
|
|
|
- movbe rax, QWORD PTR [rcx+184]
|
|
|
- movbe r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rdx+192], rax
|
|
|
- mov QWORD PTR [rdx+200], r8
|
|
|
- movbe rax, QWORD PTR [rcx+168]
|
|
|
- movbe r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rdx+208], rax
|
|
|
- mov QWORD PTR [rdx+216], r8
|
|
|
- movbe rax, QWORD PTR [rcx+152]
|
|
|
- movbe r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rdx+224], rax
|
|
|
- mov QWORD PTR [rdx+232], r8
|
|
|
- movbe rax, QWORD PTR [rcx+136]
|
|
|
- movbe r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rdx+240], rax
|
|
|
- mov QWORD PTR [rdx+248], r8
|
|
|
- movbe rax, QWORD PTR [rcx+120]
|
|
|
- movbe r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rdx+256], rax
|
|
|
- mov QWORD PTR [rdx+264], r8
|
|
|
- movbe rax, QWORD PTR [rcx+104]
|
|
|
- movbe r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rdx+272], rax
|
|
|
- mov QWORD PTR [rdx+280], r8
|
|
|
- movbe rax, QWORD PTR [rcx+88]
|
|
|
- movbe r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rdx+288], rax
|
|
|
- mov QWORD PTR [rdx+296], r8
|
|
|
- movbe rax, QWORD PTR [rcx+72]
|
|
|
- movbe r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rdx+304], rax
|
|
|
- mov QWORD PTR [rdx+312], r8
|
|
|
- movbe rax, QWORD PTR [rcx+56]
|
|
|
- movbe r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rdx+320], rax
|
|
|
- mov QWORD PTR [rdx+328], r8
|
|
|
- movbe rax, QWORD PTR [rcx+40]
|
|
|
- movbe r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rdx+336], rax
|
|
|
- mov QWORD PTR [rdx+344], r8
|
|
|
- movbe rax, QWORD PTR [rcx+24]
|
|
|
- movbe r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rdx+352], rax
|
|
|
- mov QWORD PTR [rdx+360], r8
|
|
|
- movbe rax, QWORD PTR [rcx+8]
|
|
|
- movbe r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rdx+368], rax
|
|
|
- mov QWORD PTR [rdx+376], r8
|
|
|
- ret
|
|
|
-sp_3072_to_bin_movbe_48 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mul_12 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 96
|
|
|
- ; A[0] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r11, rdx
|
|
|
- ; A[0] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- ; A[0] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+16], r12
|
|
|
- ; A[0] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- ; A[0] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- ; A[0] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- ; A[0] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- ; A[0] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- ; A[0] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+64], r12
|
|
|
- ; A[0] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+72], r10
|
|
|
- ; A[0] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+80], r11
|
|
|
- ; A[0] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+88], r12
|
|
|
- ; A[1] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- ; A[2] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- ; A[3] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- ; A[4] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- ; A[5] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- ; A[6] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- ; A[7] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- ; A[8] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- ; A[9] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- ; A[10] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- ; A[11] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r10, QWORD PTR [rsp+16]
|
|
|
- mov r11, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- mov rdx, QWORD PTR [rsp+40]
|
|
|
- mov r10, QWORD PTR [rsp+48]
|
|
|
- mov r11, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], rdx
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rsp+64]
|
|
|
- mov rdx, QWORD PTR [rsp+72]
|
|
|
- mov r10, QWORD PTR [rsp+80]
|
|
|
- mov r11, QWORD PTR [rsp+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], rdx
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- add rsp, 96
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mul_12 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r Result of multiplication.
|
|
|
-; * a First number to multiply.
|
|
|
-; * b Second number to multiply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mul_avx2_12 PROC
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- mov rbp, r8
|
|
|
- mov r8, rcx
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 96
|
|
|
- cmp r9, r8
|
|
|
- mov rbx, rsp
|
|
|
- cmovne rbx, r8
|
|
|
- cmp rbp, r8
|
|
|
- cmove rbx, rsp
|
|
|
- add r8, 96
|
|
|
- xor r14, r14
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- ; A[0] * B[0]
|
|
|
- mulx r11, r10, QWORD PTR [rbp]
|
|
|
- ; A[0] * B[1]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx], r10
|
|
|
- adcx r11, rax
|
|
|
- mov QWORD PTR [rbx+8], r11
|
|
|
- ; A[0] * B[2]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r12, rax
|
|
|
- ; A[0] * B[3]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r10, rax
|
|
|
- mov QWORD PTR [rbx+24], r10
|
|
|
- ; A[0] * B[4]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r11, rax
|
|
|
- ; A[0] * B[5]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+32], r11
|
|
|
- adcx r12, rax
|
|
|
- mov QWORD PTR [rbx+40], r12
|
|
|
- ; A[0] * B[6]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r10, rax
|
|
|
- ; A[0] * B[7]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+48], r10
|
|
|
- adcx r11, rax
|
|
|
- mov QWORD PTR [rbx+56], r11
|
|
|
- ; A[0] * B[8]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r12, rax
|
|
|
- ; A[0] * B[9]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+64], r12
|
|
|
- adcx r10, rax
|
|
|
- mov QWORD PTR [rbx+72], r10
|
|
|
- ; A[0] * B[10]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r11, rax
|
|
|
- ; A[0] * B[11]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+80], r11
|
|
|
- adcx r12, rax
|
|
|
- adcx r10, r14
|
|
|
- mov r13, r14
|
|
|
- adcx r13, r14
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mov r11, QWORD PTR [rbx+8]
|
|
|
- mov r12, QWORD PTR [rbx+16]
|
|
|
- mov r10, QWORD PTR [rbx+24]
|
|
|
- ; A[1] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- mov r11, QWORD PTR [rbx+32]
|
|
|
- mov r12, QWORD PTR [rbx+40]
|
|
|
- ; A[1] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+24], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+32], r11
|
|
|
- mov r10, QWORD PTR [rbx+48]
|
|
|
- mov r11, QWORD PTR [rbx+56]
|
|
|
- ; A[1] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[1] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+40], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+48], r10
|
|
|
- mov r12, QWORD PTR [rbx+64]
|
|
|
- mov r10, QWORD PTR [rbx+72]
|
|
|
- ; A[1] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+56], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+64], r12
|
|
|
- mov r11, QWORD PTR [rbx+80]
|
|
|
- mov r12, QWORD PTR [rbx+88]
|
|
|
- ; A[1] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+72], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+80], r11
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- ; A[1] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[1] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- mov r11, r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r13
|
|
|
- mov r13, r14
|
|
|
- adox r13, r14
|
|
|
- adcx r13, r14
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mov r12, QWORD PTR [rbx+16]
|
|
|
- mov r10, QWORD PTR [rbx+24]
|
|
|
- mov r11, QWORD PTR [rbx+32]
|
|
|
- ; A[2] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[2] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+24], r10
|
|
|
- mov r12, QWORD PTR [rbx+40]
|
|
|
- mov r10, QWORD PTR [rbx+48]
|
|
|
- ; A[2] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+32], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+40], r12
|
|
|
- mov r11, QWORD PTR [rbx+56]
|
|
|
- mov r12, QWORD PTR [rbx+64]
|
|
|
- ; A[2] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[2] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+48], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+56], r11
|
|
|
- mov r10, QWORD PTR [rbx+72]
|
|
|
- mov r11, QWORD PTR [rbx+80]
|
|
|
- ; A[2] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[2] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+64], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+72], r10
|
|
|
- mov r12, QWORD PTR [rbx+88]
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- ; A[2] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+80], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[2] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[2] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r12, r14
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r13
|
|
|
- mov r13, r14
|
|
|
- adox r13, r14
|
|
|
- adcx r13, r14
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mov r10, QWORD PTR [rbx+24]
|
|
|
- mov r11, QWORD PTR [rbx+32]
|
|
|
- mov r12, QWORD PTR [rbx+40]
|
|
|
- ; A[3] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[3] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+24], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+32], r11
|
|
|
- mov r10, QWORD PTR [rbx+48]
|
|
|
- mov r11, QWORD PTR [rbx+56]
|
|
|
- ; A[3] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[3] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+40], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+48], r10
|
|
|
- mov r12, QWORD PTR [rbx+64]
|
|
|
- mov r10, QWORD PTR [rbx+72]
|
|
|
- ; A[3] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[3] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+56], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+64], r12
|
|
|
- mov r11, QWORD PTR [rbx+80]
|
|
|
- mov r12, QWORD PTR [rbx+88]
|
|
|
- ; A[3] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[3] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+72], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+80], r11
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[3] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[3] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- ; A[3] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[3] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov r10, r14
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r13
|
|
|
- mov r13, r14
|
|
|
- adox r13, r14
|
|
|
- adcx r13, r14
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mov r11, QWORD PTR [rbx+32]
|
|
|
- mov r12, QWORD PTR [rbx+40]
|
|
|
- mov r10, QWORD PTR [rbx+48]
|
|
|
- ; A[4] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[4] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+32], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+40], r12
|
|
|
- mov r11, QWORD PTR [rbx+56]
|
|
|
- mov r12, QWORD PTR [rbx+64]
|
|
|
- ; A[4] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+48], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+56], r11
|
|
|
- mov r10, QWORD PTR [rbx+72]
|
|
|
- mov r11, QWORD PTR [rbx+80]
|
|
|
- ; A[4] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+64], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+72], r10
|
|
|
- mov r12, QWORD PTR [rbx+88]
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- ; A[4] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[4] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+80], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- ; A[4] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[4] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- mov r11, r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r13
|
|
|
- mov r13, r14
|
|
|
- adox r13, r14
|
|
|
- adcx r13, r14
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mov r12, QWORD PTR [rbx+40]
|
|
|
- mov r10, QWORD PTR [rbx+48]
|
|
|
- mov r11, QWORD PTR [rbx+56]
|
|
|
- ; A[5] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+40], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+48], r10
|
|
|
- mov r12, QWORD PTR [rbx+64]
|
|
|
- mov r10, QWORD PTR [rbx+72]
|
|
|
- ; A[5] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[5] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+56], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+64], r12
|
|
|
- mov r11, QWORD PTR [rbx+80]
|
|
|
- mov r12, QWORD PTR [rbx+88]
|
|
|
- ; A[5] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+72], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+80], r11
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[5] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[5] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[5] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- ; A[5] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov r12, r14
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r13
|
|
|
- mov r13, r14
|
|
|
- adox r13, r14
|
|
|
- adcx r13, r14
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mov r10, QWORD PTR [rbx+48]
|
|
|
- mov r11, QWORD PTR [rbx+56]
|
|
|
- mov r12, QWORD PTR [rbx+64]
|
|
|
- ; A[6] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+48], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+56], r11
|
|
|
- mov r10, QWORD PTR [rbx+72]
|
|
|
- mov r11, QWORD PTR [rbx+80]
|
|
|
- ; A[6] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[6] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+64], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+72], r10
|
|
|
- mov r12, QWORD PTR [rbx+88]
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- ; A[6] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+80], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- ; A[6] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- ; A[6] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[6] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov r12, QWORD PTR [r8+40]
|
|
|
- ; A[6] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov r10, r14
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r13
|
|
|
- mov r13, r14
|
|
|
- adox r13, r14
|
|
|
- adcx r13, r14
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mov r11, QWORD PTR [rbx+56]
|
|
|
- mov r12, QWORD PTR [rbx+64]
|
|
|
- mov r10, QWORD PTR [rbx+72]
|
|
|
- ; A[7] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+56], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+64], r12
|
|
|
- mov r11, QWORD PTR [rbx+80]
|
|
|
- mov r12, QWORD PTR [rbx+88]
|
|
|
- ; A[7] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[7] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+72], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+80], r11
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[7] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[7] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- mov r12, QWORD PTR [r8+40]
|
|
|
- ; A[7] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[7] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- ; A[7] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- mov r11, r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r13
|
|
|
- mov r13, r14
|
|
|
- adox r13, r14
|
|
|
- adcx r13, r14
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- mov QWORD PTR [r8+56], r11
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mov r12, QWORD PTR [rbx+64]
|
|
|
- mov r10, QWORD PTR [rbx+72]
|
|
|
- mov r11, QWORD PTR [rbx+80]
|
|
|
- ; A[8] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[8] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+64], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+72], r10
|
|
|
- mov r12, QWORD PTR [rbx+88]
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- ; A[8] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[8] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+80], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- ; A[8] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[8] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- ; A[8] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[8] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov r12, QWORD PTR [r8+40]
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- ; A[8] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[8] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- ; A[8] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[8] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- mov r12, r14
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r13
|
|
|
- mov r13, r14
|
|
|
- adox r13, r14
|
|
|
- adcx r13, r14
|
|
|
- mov QWORD PTR [r8+56], r11
|
|
|
- mov QWORD PTR [r8+64], r12
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mov r10, QWORD PTR [rbx+72]
|
|
|
- mov r11, QWORD PTR [rbx+80]
|
|
|
- mov r12, QWORD PTR [rbx+88]
|
|
|
- ; A[9] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[9] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+72], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+80], r11
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[9] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[9] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[9] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[9] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- mov r12, QWORD PTR [r8+40]
|
|
|
- ; A[9] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[9] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- ; A[9] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[9] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- mov r12, QWORD PTR [r8+64]
|
|
|
- ; A[9] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[9] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+56], r11
|
|
|
- mov r10, r14
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r13
|
|
|
- mov r13, r14
|
|
|
- adox r13, r14
|
|
|
- adcx r13, r14
|
|
|
- mov QWORD PTR [r8+64], r12
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mov r11, QWORD PTR [rbx+80]
|
|
|
- mov r12, QWORD PTR [rbx+88]
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- ; A[10] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[10] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+80], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- ; A[10] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[10] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- ; A[10] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[10] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov r12, QWORD PTR [r8+40]
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- ; A[10] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[10] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- mov r12, QWORD PTR [r8+64]
|
|
|
- ; A[10] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[10] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+56], r11
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- ; A[10] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[10] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+64], r12
|
|
|
- mov r11, r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r13
|
|
|
- mov r13, r14
|
|
|
- adox r13, r14
|
|
|
- adcx r13, r14
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mov r12, QWORD PTR [rbx+88]
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[11] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[11] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+88], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[11] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[11] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- mov r12, QWORD PTR [r8+40]
|
|
|
- ; A[11] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[11] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- ; A[11] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[11] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- mov r12, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- ; A[11] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[11] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+56], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+64], r12
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- ; A[11] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[11] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov r12, r14
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r13
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- sub r8, 96
|
|
|
- cmp r9, r8
|
|
|
- je L_start_3072_mul_avx2_12
|
|
|
- cmp rbp, r8
|
|
|
- jne L_end_3072_mul_avx2_12
|
|
|
-L_start_3072_mul_avx2_12:
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx]
|
|
|
- vmovups OWORD PTR [r8], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+16]
|
|
|
- vmovups OWORD PTR [r8+16], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+32]
|
|
|
- vmovups OWORD PTR [r8+32], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+48]
|
|
|
- vmovups OWORD PTR [r8+48], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+64]
|
|
|
- vmovups OWORD PTR [r8+64], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+80]
|
|
|
- vmovups OWORD PTR [r8+80], xmm0
|
|
|
-L_end_3072_mul_avx2_12:
|
|
|
- add rsp, 96
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- ret
|
|
|
-sp_3072_mul_avx2_12 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Add b to a into r. (r = a + b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_add_12 PROC
|
|
|
- ; Add
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- xor rax, rax
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- adc r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- adc r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- adc r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- adc r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- adc r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- adc r9, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- adc r10, QWORD PTR [r8+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r9, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+80], r9
|
|
|
- adc r10, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- adc rax, 0
|
|
|
- ret
|
|
|
-sp_3072_add_12 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Sub b from a into a. (a -= b)
|
|
|
-; *
|
|
|
-; * a A single precision integer and result.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_sub_in_place_24 PROC
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- sub r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb r9, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb r9, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb r9, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb r9, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb r9, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb r9, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb r9, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb r9, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov r9, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- sbb r9, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], r9
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov r9, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- sbb r9, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], r9
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov r9, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- sbb r9, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], r9
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov r9, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- sbb r9, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [rcx+184], r9
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_3072_sub_in_place_24 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Add b to a into r. (r = a + b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_add_24 PROC
|
|
|
- ; Add
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- xor rax, rax
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- adc r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- adc r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- adc r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- adc r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- adc r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- adc r9, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- adc r10, QWORD PTR [r8+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r9, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+80], r9
|
|
|
- adc r10, QWORD PTR [r8+88]
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- adc r9, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [rcx+96], r9
|
|
|
- adc r10, QWORD PTR [r8+104]
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- adc r9, QWORD PTR [r8+112]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+112], r9
|
|
|
- adc r10, QWORD PTR [r8+120]
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc r9, QWORD PTR [r8+128]
|
|
|
- mov r10, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [rcx+128], r9
|
|
|
- adc r10, QWORD PTR [r8+136]
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [rcx+136], r10
|
|
|
- adc r9, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [rcx+144], r9
|
|
|
- adc r10, QWORD PTR [r8+152]
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [rcx+152], r10
|
|
|
- adc r9, QWORD PTR [r8+160]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [rcx+160], r9
|
|
|
- adc r10, QWORD PTR [r8+168]
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- adc r9, QWORD PTR [r8+176]
|
|
|
- mov r10, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [rcx+176], r9
|
|
|
- adc r10, QWORD PTR [r8+184]
|
|
|
- mov QWORD PTR [rcx+184], r10
|
|
|
- adc rax, 0
|
|
|
- ret
|
|
|
-sp_3072_add_24 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mul_24 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- sub rsp, 616
|
|
|
- mov QWORD PTR [rsp+576], rcx
|
|
|
- mov QWORD PTR [rsp+584], rdx
|
|
|
- mov QWORD PTR [rsp+592], r8
|
|
|
- lea r12, QWORD PTR [rsp+384]
|
|
|
- lea r14, QWORD PTR [rdx+96]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- xor r15, r15
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+600], r15
|
|
|
- lea r13, QWORD PTR [rsp+480]
|
|
|
- lea r14, QWORD PTR [r8+96]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- xor rdi, rdi
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [r8+8]
|
|
|
- mov QWORD PTR [r13], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [r13+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [r8+32]
|
|
|
- mov QWORD PTR [r13+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mov QWORD PTR [r13+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [r13+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mov QWORD PTR [r13+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [r8+80]
|
|
|
- mov QWORD PTR [r13+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov QWORD PTR [r13+88], r10
|
|
|
- adc rdi, 0
|
|
|
- mov QWORD PTR [rsp+608], rdi
|
|
|
- mov r8, r13
|
|
|
- mov rdx, r12
|
|
|
- mov rcx, rsp
|
|
|
- call sp_3072_mul_12
|
|
|
- mov r8, QWORD PTR [rsp+592]
|
|
|
- mov rdx, QWORD PTR [rsp+584]
|
|
|
- lea rcx, QWORD PTR [rsp+192]
|
|
|
- add r8, 96
|
|
|
- add rdx, 96
|
|
|
- call sp_3072_mul_12
|
|
|
- mov r8, QWORD PTR [rsp+592]
|
|
|
- mov rdx, QWORD PTR [rsp+584]
|
|
|
- mov rcx, QWORD PTR [rsp+576]
|
|
|
- call sp_3072_mul_12
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, QWORD PTR [rsp+592]
|
|
|
- mov rdx, QWORD PTR [rsp+584]
|
|
|
- mov rcx, QWORD PTR [rsp+576]
|
|
|
-ENDIF
|
|
|
- mov r15, QWORD PTR [rsp+600]
|
|
|
- mov rdi, QWORD PTR [rsp+608]
|
|
|
- mov rsi, QWORD PTR [rsp+576]
|
|
|
- mov r11, r15
|
|
|
- lea r12, QWORD PTR [rsp+384]
|
|
|
- lea r13, QWORD PTR [rsp+480]
|
|
|
- and r11, rdi
|
|
|
- neg r15
|
|
|
- neg rdi
|
|
|
- add rsi, 192
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [r13]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- mov QWORD PTR [r13], r9
|
|
|
- mov rax, QWORD PTR [r12+8]
|
|
|
- mov r9, QWORD PTR [r13+8]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+8], rax
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- mov rax, QWORD PTR [r12+16]
|
|
|
- mov r9, QWORD PTR [r13+16]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+16], rax
|
|
|
- mov QWORD PTR [r13+16], r9
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [r13+24]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- mov QWORD PTR [r13+24], r9
|
|
|
- mov rax, QWORD PTR [r12+32]
|
|
|
- mov r9, QWORD PTR [r13+32]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+32], rax
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- mov rax, QWORD PTR [r12+40]
|
|
|
- mov r9, QWORD PTR [r13+40]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+40], rax
|
|
|
- mov QWORD PTR [r13+40], r9
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [r13+48]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- mov QWORD PTR [r13+48], r9
|
|
|
- mov rax, QWORD PTR [r12+56]
|
|
|
- mov r9, QWORD PTR [r13+56]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+56], rax
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- mov rax, QWORD PTR [r12+64]
|
|
|
- mov r9, QWORD PTR [r13+64]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+64], rax
|
|
|
- mov QWORD PTR [r13+64], r9
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [r13+72]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- mov QWORD PTR [r13+72], r9
|
|
|
- mov rax, QWORD PTR [r12+80]
|
|
|
- mov r9, QWORD PTR [r13+80]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+80], rax
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- mov rax, QWORD PTR [r12+88]
|
|
|
- mov r9, QWORD PTR [r13+88]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+88], rax
|
|
|
- mov QWORD PTR [r13+88], r9
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc r11, 0
|
|
|
- lea r13, QWORD PTR [rsp+192]
|
|
|
- mov r12, rsp
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [r13+184]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb r11, 0
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [rcx+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [rcx+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [rcx+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [rcx+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [rcx+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [rcx+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [rcx+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [rcx+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [rcx+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [rcx+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [rcx+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [rcx+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [rcx+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [rcx+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [rcx+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [rcx+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [rcx+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb r11, 0
|
|
|
- sub rsi, 96
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r12+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r12+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r12+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r12+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r12+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r12+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r12+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+288], r11
|
|
|
- add rsi, 96
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r13+96]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- ; Add to zero
|
|
|
- mov rax, QWORD PTR [r13+104]
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+112]
|
|
|
- mov QWORD PTR [rsi+104], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+120]
|
|
|
- mov QWORD PTR [rsi+112], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+128]
|
|
|
- mov QWORD PTR [rsi+120], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+136]
|
|
|
- mov QWORD PTR [rsi+128], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+144]
|
|
|
- mov QWORD PTR [rsi+136], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+152]
|
|
|
- mov QWORD PTR [rsi+144], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+160]
|
|
|
- mov QWORD PTR [rsi+152], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+168]
|
|
|
- mov QWORD PTR [rsi+160], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+176]
|
|
|
- mov QWORD PTR [rsi+168], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+184]
|
|
|
- mov QWORD PTR [rsi+176], rax
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsi+184], r9
|
|
|
- add rsp, 616
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mul_24 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mul_avx2_24 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- sub rsp, 616
|
|
|
- mov QWORD PTR [rsp+576], rcx
|
|
|
- mov QWORD PTR [rsp+584], rdx
|
|
|
- mov QWORD PTR [rsp+592], r8
|
|
|
- lea r12, QWORD PTR [rsp+384]
|
|
|
- lea r14, QWORD PTR [rdx+96]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- xor r15, r15
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+600], r15
|
|
|
- lea r13, QWORD PTR [rsp+480]
|
|
|
- lea r14, QWORD PTR [r8+96]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- xor rdi, rdi
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [r8+8]
|
|
|
- mov QWORD PTR [r13], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [r13+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [r8+32]
|
|
|
- mov QWORD PTR [r13+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mov QWORD PTR [r13+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [r13+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mov QWORD PTR [r13+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [r8+80]
|
|
|
- mov QWORD PTR [r13+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov QWORD PTR [r13+88], r10
|
|
|
- adc rdi, 0
|
|
|
- mov QWORD PTR [rsp+608], rdi
|
|
|
- mov r8, r13
|
|
|
- mov rdx, r12
|
|
|
- mov rcx, rsp
|
|
|
- call sp_3072_mul_avx2_12
|
|
|
- mov r8, QWORD PTR [rsp+592]
|
|
|
- mov rdx, QWORD PTR [rsp+584]
|
|
|
- lea rcx, QWORD PTR [rsp+192]
|
|
|
- add r8, 96
|
|
|
- add rdx, 96
|
|
|
- call sp_3072_mul_avx2_12
|
|
|
- mov r8, QWORD PTR [rsp+592]
|
|
|
- mov rdx, QWORD PTR [rsp+584]
|
|
|
- mov rcx, QWORD PTR [rsp+576]
|
|
|
- call sp_3072_mul_avx2_12
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, QWORD PTR [rsp+592]
|
|
|
- mov rdx, QWORD PTR [rsp+584]
|
|
|
- mov rcx, QWORD PTR [rsp+576]
|
|
|
-ENDIF
|
|
|
- mov r15, QWORD PTR [rsp+600]
|
|
|
- mov rdi, QWORD PTR [rsp+608]
|
|
|
- mov rsi, QWORD PTR [rsp+576]
|
|
|
- mov r11, r15
|
|
|
- lea r12, QWORD PTR [rsp+384]
|
|
|
- lea r13, QWORD PTR [rsp+480]
|
|
|
- and r11, rdi
|
|
|
- neg r15
|
|
|
- neg rdi
|
|
|
- add rsi, 192
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [r13]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- add rax, r9
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [r13+8]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [r13+16]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [r13+24]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [r13+32]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [r13+40]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [r13+48]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [r13+56]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [r13+64]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [r13+72]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [r13+80]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [r13+88]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, rax
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc r11, 0
|
|
|
- lea r13, QWORD PTR [rsp+192]
|
|
|
- mov r12, rsp
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [r13+184]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb r11, 0
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [rcx+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [rcx+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [rcx+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [rcx+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [rcx+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [rcx+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [rcx+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [rcx+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [rcx+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [rcx+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [rcx+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [rcx+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [rcx+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [rcx+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [rcx+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [rcx+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [rcx+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb r11, 0
|
|
|
- sub rsi, 96
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r12+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r12+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r12+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r12+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r12+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r12+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r12+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+288], r11
|
|
|
- add rsi, 96
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r13+96]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- ; Add to zero
|
|
|
- mov rax, QWORD PTR [r13+104]
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+112]
|
|
|
- mov QWORD PTR [rsi+104], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+120]
|
|
|
- mov QWORD PTR [rsi+112], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+128]
|
|
|
- mov QWORD PTR [rsi+120], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+136]
|
|
|
- mov QWORD PTR [rsi+128], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+144]
|
|
|
- mov QWORD PTR [rsi+136], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+152]
|
|
|
- mov QWORD PTR [rsi+144], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+160]
|
|
|
- mov QWORD PTR [rsi+152], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+168]
|
|
|
- mov QWORD PTR [rsi+160], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+176]
|
|
|
- mov QWORD PTR [rsi+168], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+184]
|
|
|
- mov QWORD PTR [rsi+176], rax
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsi+184], r9
|
|
|
- add rsp, 616
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mul_avx2_24 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Sub b from a into a. (a -= b)
|
|
|
-; *
|
|
|
-; * a A single precision integer and result.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_sub_in_place_48 PROC
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- sub r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb r9, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb r9, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb r9, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb r9, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb r9, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb r9, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb r9, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb r9, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov r9, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- sbb r9, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], r9
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov r9, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- sbb r9, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], r9
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov r9, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- sbb r9, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], r9
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov r9, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- sbb r9, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rcx+192]
|
|
|
- mov QWORD PTR [rcx+184], r9
|
|
|
- sbb r8, QWORD PTR [rdx+192]
|
|
|
- mov r9, QWORD PTR [rcx+200]
|
|
|
- mov QWORD PTR [rcx+192], r8
|
|
|
- sbb r9, QWORD PTR [rdx+200]
|
|
|
- mov r8, QWORD PTR [rcx+208]
|
|
|
- mov QWORD PTR [rcx+200], r9
|
|
|
- sbb r8, QWORD PTR [rdx+208]
|
|
|
- mov r9, QWORD PTR [rcx+216]
|
|
|
- mov QWORD PTR [rcx+208], r8
|
|
|
- sbb r9, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [rcx+224]
|
|
|
- mov QWORD PTR [rcx+216], r9
|
|
|
- sbb r8, QWORD PTR [rdx+224]
|
|
|
- mov r9, QWORD PTR [rcx+232]
|
|
|
- mov QWORD PTR [rcx+224], r8
|
|
|
- sbb r9, QWORD PTR [rdx+232]
|
|
|
- mov r8, QWORD PTR [rcx+240]
|
|
|
- mov QWORD PTR [rcx+232], r9
|
|
|
- sbb r8, QWORD PTR [rdx+240]
|
|
|
- mov r9, QWORD PTR [rcx+248]
|
|
|
- mov QWORD PTR [rcx+240], r8
|
|
|
- sbb r9, QWORD PTR [rdx+248]
|
|
|
- mov r8, QWORD PTR [rcx+256]
|
|
|
- mov QWORD PTR [rcx+248], r9
|
|
|
- sbb r8, QWORD PTR [rdx+256]
|
|
|
- mov r9, QWORD PTR [rcx+264]
|
|
|
- mov QWORD PTR [rcx+256], r8
|
|
|
- sbb r9, QWORD PTR [rdx+264]
|
|
|
- mov r8, QWORD PTR [rcx+272]
|
|
|
- mov QWORD PTR [rcx+264], r9
|
|
|
- sbb r8, QWORD PTR [rdx+272]
|
|
|
- mov r9, QWORD PTR [rcx+280]
|
|
|
- mov QWORD PTR [rcx+272], r8
|
|
|
- sbb r9, QWORD PTR [rdx+280]
|
|
|
- mov r8, QWORD PTR [rcx+288]
|
|
|
- mov QWORD PTR [rcx+280], r9
|
|
|
- sbb r8, QWORD PTR [rdx+288]
|
|
|
- mov r9, QWORD PTR [rcx+296]
|
|
|
- mov QWORD PTR [rcx+288], r8
|
|
|
- sbb r9, QWORD PTR [rdx+296]
|
|
|
- mov r8, QWORD PTR [rcx+304]
|
|
|
- mov QWORD PTR [rcx+296], r9
|
|
|
- sbb r8, QWORD PTR [rdx+304]
|
|
|
- mov r9, QWORD PTR [rcx+312]
|
|
|
- mov QWORD PTR [rcx+304], r8
|
|
|
- sbb r9, QWORD PTR [rdx+312]
|
|
|
- mov r8, QWORD PTR [rcx+320]
|
|
|
- mov QWORD PTR [rcx+312], r9
|
|
|
- sbb r8, QWORD PTR [rdx+320]
|
|
|
- mov r9, QWORD PTR [rcx+328]
|
|
|
- mov QWORD PTR [rcx+320], r8
|
|
|
- sbb r9, QWORD PTR [rdx+328]
|
|
|
- mov r8, QWORD PTR [rcx+336]
|
|
|
- mov QWORD PTR [rcx+328], r9
|
|
|
- sbb r8, QWORD PTR [rdx+336]
|
|
|
- mov r9, QWORD PTR [rcx+344]
|
|
|
- mov QWORD PTR [rcx+336], r8
|
|
|
- sbb r9, QWORD PTR [rdx+344]
|
|
|
- mov r8, QWORD PTR [rcx+352]
|
|
|
- mov QWORD PTR [rcx+344], r9
|
|
|
- sbb r8, QWORD PTR [rdx+352]
|
|
|
- mov r9, QWORD PTR [rcx+360]
|
|
|
- mov QWORD PTR [rcx+352], r8
|
|
|
- sbb r9, QWORD PTR [rdx+360]
|
|
|
- mov r8, QWORD PTR [rcx+368]
|
|
|
- mov QWORD PTR [rcx+360], r9
|
|
|
- sbb r8, QWORD PTR [rdx+368]
|
|
|
- mov r9, QWORD PTR [rcx+376]
|
|
|
- mov QWORD PTR [rcx+368], r8
|
|
|
- sbb r9, QWORD PTR [rdx+376]
|
|
|
- mov QWORD PTR [rcx+376], r9
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_3072_sub_in_place_48 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Add b to a into r. (r = a + b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_add_48 PROC
|
|
|
- ; Add
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- xor rax, rax
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- adc r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- adc r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- adc r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- adc r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- adc r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- adc r9, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- adc r10, QWORD PTR [r8+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r9, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+80], r9
|
|
|
- adc r10, QWORD PTR [r8+88]
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- adc r9, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [rcx+96], r9
|
|
|
- adc r10, QWORD PTR [r8+104]
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- adc r9, QWORD PTR [r8+112]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+112], r9
|
|
|
- adc r10, QWORD PTR [r8+120]
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc r9, QWORD PTR [r8+128]
|
|
|
- mov r10, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [rcx+128], r9
|
|
|
- adc r10, QWORD PTR [r8+136]
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [rcx+136], r10
|
|
|
- adc r9, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [rcx+144], r9
|
|
|
- adc r10, QWORD PTR [r8+152]
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [rcx+152], r10
|
|
|
- adc r9, QWORD PTR [r8+160]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [rcx+160], r9
|
|
|
- adc r10, QWORD PTR [r8+168]
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- adc r9, QWORD PTR [r8+176]
|
|
|
- mov r10, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [rcx+176], r9
|
|
|
- adc r10, QWORD PTR [r8+184]
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- mov QWORD PTR [rcx+184], r10
|
|
|
- adc r9, QWORD PTR [r8+192]
|
|
|
- mov r10, QWORD PTR [rdx+200]
|
|
|
- mov QWORD PTR [rcx+192], r9
|
|
|
- adc r10, QWORD PTR [r8+200]
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- mov QWORD PTR [rcx+200], r10
|
|
|
- adc r9, QWORD PTR [r8+208]
|
|
|
- mov r10, QWORD PTR [rdx+216]
|
|
|
- mov QWORD PTR [rcx+208], r9
|
|
|
- adc r10, QWORD PTR [r8+216]
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- adc r9, QWORD PTR [r8+224]
|
|
|
- mov r10, QWORD PTR [rdx+232]
|
|
|
- mov QWORD PTR [rcx+224], r9
|
|
|
- adc r10, QWORD PTR [r8+232]
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- mov QWORD PTR [rcx+232], r10
|
|
|
- adc r9, QWORD PTR [r8+240]
|
|
|
- mov r10, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [rcx+240], r9
|
|
|
- adc r10, QWORD PTR [r8+248]
|
|
|
- mov r9, QWORD PTR [rdx+256]
|
|
|
- mov QWORD PTR [rcx+248], r10
|
|
|
- adc r9, QWORD PTR [r8+256]
|
|
|
- mov r10, QWORD PTR [rdx+264]
|
|
|
- mov QWORD PTR [rcx+256], r9
|
|
|
- adc r10, QWORD PTR [r8+264]
|
|
|
- mov r9, QWORD PTR [rdx+272]
|
|
|
- mov QWORD PTR [rcx+264], r10
|
|
|
- adc r9, QWORD PTR [r8+272]
|
|
|
- mov r10, QWORD PTR [rdx+280]
|
|
|
- mov QWORD PTR [rcx+272], r9
|
|
|
- adc r10, QWORD PTR [r8+280]
|
|
|
- mov r9, QWORD PTR [rdx+288]
|
|
|
- mov QWORD PTR [rcx+280], r10
|
|
|
- adc r9, QWORD PTR [r8+288]
|
|
|
- mov r10, QWORD PTR [rdx+296]
|
|
|
- mov QWORD PTR [rcx+288], r9
|
|
|
- adc r10, QWORD PTR [r8+296]
|
|
|
- mov r9, QWORD PTR [rdx+304]
|
|
|
- mov QWORD PTR [rcx+296], r10
|
|
|
- adc r9, QWORD PTR [r8+304]
|
|
|
- mov r10, QWORD PTR [rdx+312]
|
|
|
- mov QWORD PTR [rcx+304], r9
|
|
|
- adc r10, QWORD PTR [r8+312]
|
|
|
- mov r9, QWORD PTR [rdx+320]
|
|
|
- mov QWORD PTR [rcx+312], r10
|
|
|
- adc r9, QWORD PTR [r8+320]
|
|
|
- mov r10, QWORD PTR [rdx+328]
|
|
|
- mov QWORD PTR [rcx+320], r9
|
|
|
- adc r10, QWORD PTR [r8+328]
|
|
|
- mov r9, QWORD PTR [rdx+336]
|
|
|
- mov QWORD PTR [rcx+328], r10
|
|
|
- adc r9, QWORD PTR [r8+336]
|
|
|
- mov r10, QWORD PTR [rdx+344]
|
|
|
- mov QWORD PTR [rcx+336], r9
|
|
|
- adc r10, QWORD PTR [r8+344]
|
|
|
- mov r9, QWORD PTR [rdx+352]
|
|
|
- mov QWORD PTR [rcx+344], r10
|
|
|
- adc r9, QWORD PTR [r8+352]
|
|
|
- mov r10, QWORD PTR [rdx+360]
|
|
|
- mov QWORD PTR [rcx+352], r9
|
|
|
- adc r10, QWORD PTR [r8+360]
|
|
|
- mov r9, QWORD PTR [rdx+368]
|
|
|
- mov QWORD PTR [rcx+360], r10
|
|
|
- adc r9, QWORD PTR [r8+368]
|
|
|
- mov r10, QWORD PTR [rdx+376]
|
|
|
- mov QWORD PTR [rcx+368], r9
|
|
|
- adc r10, QWORD PTR [r8+376]
|
|
|
- mov QWORD PTR [rcx+376], r10
|
|
|
- adc rax, 0
|
|
|
- ret
|
|
|
-sp_3072_add_48 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mul_48 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- sub rsp, 1192
|
|
|
- mov QWORD PTR [rsp+1152], rcx
|
|
|
- mov QWORD PTR [rsp+1160], rdx
|
|
|
- mov QWORD PTR [rsp+1168], r8
|
|
|
- lea r12, QWORD PTR [rsp+768]
|
|
|
- lea r14, QWORD PTR [rdx+192]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- xor r15, r15
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- adc r9, QWORD PTR [r14+128]
|
|
|
- mov r10, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- adc r10, QWORD PTR [r14+136]
|
|
|
- mov rax, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- adc rax, QWORD PTR [r14+144]
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- adc r9, QWORD PTR [r14+152]
|
|
|
- mov r10, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- adc r10, QWORD PTR [r14+160]
|
|
|
- mov rax, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- adc rax, QWORD PTR [r14+168]
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- adc r9, QWORD PTR [r14+176]
|
|
|
- mov r10, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- adc r10, QWORD PTR [r14+184]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+1176], r15
|
|
|
- lea r13, QWORD PTR [rsp+960]
|
|
|
- lea r14, QWORD PTR [r8+192]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- xor rdi, rdi
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [r8+8]
|
|
|
- mov QWORD PTR [r13], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [r13+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [r8+32]
|
|
|
- mov QWORD PTR [r13+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mov QWORD PTR [r13+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [r13+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mov QWORD PTR [r13+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [r8+80]
|
|
|
- mov QWORD PTR [r13+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mov QWORD PTR [r13+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [r8+104]
|
|
|
- mov QWORD PTR [r13+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov QWORD PTR [r13+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [r13+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov r9, QWORD PTR [r8+128]
|
|
|
- mov QWORD PTR [r13+120], rax
|
|
|
- adc r9, QWORD PTR [r14+128]
|
|
|
- mov r10, QWORD PTR [r8+136]
|
|
|
- mov QWORD PTR [r13+128], r9
|
|
|
- adc r10, QWORD PTR [r14+136]
|
|
|
- mov rax, QWORD PTR [r8+144]
|
|
|
- mov QWORD PTR [r13+136], r10
|
|
|
- adc rax, QWORD PTR [r14+144]
|
|
|
- mov r9, QWORD PTR [r8+152]
|
|
|
- mov QWORD PTR [r13+144], rax
|
|
|
- adc r9, QWORD PTR [r14+152]
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov QWORD PTR [r13+152], r9
|
|
|
- adc r10, QWORD PTR [r14+160]
|
|
|
- mov rax, QWORD PTR [r8+168]
|
|
|
- mov QWORD PTR [r13+160], r10
|
|
|
- adc rax, QWORD PTR [r14+168]
|
|
|
- mov r9, QWORD PTR [r8+176]
|
|
|
- mov QWORD PTR [r13+168], rax
|
|
|
- adc r9, QWORD PTR [r14+176]
|
|
|
- mov r10, QWORD PTR [r8+184]
|
|
|
- mov QWORD PTR [r13+176], r9
|
|
|
- adc r10, QWORD PTR [r14+184]
|
|
|
- mov QWORD PTR [r13+184], r10
|
|
|
- adc rdi, 0
|
|
|
- mov QWORD PTR [rsp+1184], rdi
|
|
|
- mov r8, r13
|
|
|
- mov rdx, r12
|
|
|
- mov rcx, rsp
|
|
|
- call sp_3072_mul_24
|
|
|
- mov r8, QWORD PTR [rsp+1168]
|
|
|
- mov rdx, QWORD PTR [rsp+1160]
|
|
|
- lea rcx, QWORD PTR [rsp+384]
|
|
|
- add r8, 192
|
|
|
- add rdx, 192
|
|
|
- call sp_3072_mul_24
|
|
|
- mov r8, QWORD PTR [rsp+1168]
|
|
|
- mov rdx, QWORD PTR [rsp+1160]
|
|
|
- mov rcx, QWORD PTR [rsp+1152]
|
|
|
- call sp_3072_mul_24
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, QWORD PTR [rsp+1168]
|
|
|
- mov rdx, QWORD PTR [rsp+1160]
|
|
|
- mov rcx, QWORD PTR [rsp+1152]
|
|
|
-ENDIF
|
|
|
- mov r15, QWORD PTR [rsp+1176]
|
|
|
- mov rdi, QWORD PTR [rsp+1184]
|
|
|
- mov rsi, QWORD PTR [rsp+1152]
|
|
|
- mov r11, r15
|
|
|
- lea r12, QWORD PTR [rsp+768]
|
|
|
- lea r13, QWORD PTR [rsp+960]
|
|
|
- and r11, rdi
|
|
|
- neg r15
|
|
|
- neg rdi
|
|
|
- add rsi, 384
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [r13]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- mov QWORD PTR [r13], r9
|
|
|
- mov rax, QWORD PTR [r12+8]
|
|
|
- mov r9, QWORD PTR [r13+8]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+8], rax
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- mov rax, QWORD PTR [r12+16]
|
|
|
- mov r9, QWORD PTR [r13+16]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+16], rax
|
|
|
- mov QWORD PTR [r13+16], r9
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [r13+24]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- mov QWORD PTR [r13+24], r9
|
|
|
- mov rax, QWORD PTR [r12+32]
|
|
|
- mov r9, QWORD PTR [r13+32]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+32], rax
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- mov rax, QWORD PTR [r12+40]
|
|
|
- mov r9, QWORD PTR [r13+40]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+40], rax
|
|
|
- mov QWORD PTR [r13+40], r9
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [r13+48]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- mov QWORD PTR [r13+48], r9
|
|
|
- mov rax, QWORD PTR [r12+56]
|
|
|
- mov r9, QWORD PTR [r13+56]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+56], rax
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- mov rax, QWORD PTR [r12+64]
|
|
|
- mov r9, QWORD PTR [r13+64]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+64], rax
|
|
|
- mov QWORD PTR [r13+64], r9
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [r13+72]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- mov QWORD PTR [r13+72], r9
|
|
|
- mov rax, QWORD PTR [r12+80]
|
|
|
- mov r9, QWORD PTR [r13+80]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+80], rax
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- mov rax, QWORD PTR [r12+88]
|
|
|
- mov r9, QWORD PTR [r13+88]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+88], rax
|
|
|
- mov QWORD PTR [r13+88], r9
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [r13+96]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- mov QWORD PTR [r13+96], r9
|
|
|
- mov rax, QWORD PTR [r12+104]
|
|
|
- mov r9, QWORD PTR [r13+104]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+104], rax
|
|
|
- mov QWORD PTR [r13+104], r9
|
|
|
- mov rax, QWORD PTR [r12+112]
|
|
|
- mov r9, QWORD PTR [r13+112]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+112], rax
|
|
|
- mov QWORD PTR [r13+112], r9
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [r13+120]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- mov QWORD PTR [r13+120], r9
|
|
|
- mov rax, QWORD PTR [r12+128]
|
|
|
- mov r9, QWORD PTR [r13+128]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+128], rax
|
|
|
- mov QWORD PTR [r13+128], r9
|
|
|
- mov rax, QWORD PTR [r12+136]
|
|
|
- mov r9, QWORD PTR [r13+136]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+136], rax
|
|
|
- mov QWORD PTR [r13+136], r9
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [r13+144]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- mov QWORD PTR [r13+144], r9
|
|
|
- mov rax, QWORD PTR [r12+152]
|
|
|
- mov r9, QWORD PTR [r13+152]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+152], rax
|
|
|
- mov QWORD PTR [r13+152], r9
|
|
|
- mov rax, QWORD PTR [r12+160]
|
|
|
- mov r9, QWORD PTR [r13+160]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+160], rax
|
|
|
- mov QWORD PTR [r13+160], r9
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [r13+168]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- mov QWORD PTR [r13+168], r9
|
|
|
- mov rax, QWORD PTR [r12+176]
|
|
|
- mov r9, QWORD PTR [r13+176]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+176], rax
|
|
|
- mov QWORD PTR [r13+176], r9
|
|
|
- mov rax, QWORD PTR [r12+184]
|
|
|
- mov r9, QWORD PTR [r13+184]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+184], rax
|
|
|
- mov QWORD PTR [r13+184], r9
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r13+184]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc r11, 0
|
|
|
- lea r13, QWORD PTR [rsp+384]
|
|
|
- mov r12, rsp
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [r13+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [r13+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [r13+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [r13+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [r13+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [r13+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [r13+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [r13+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [r13+248]
|
|
|
- mov r10, QWORD PTR [r12+256]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r10, QWORD PTR [r13+256]
|
|
|
- mov rax, QWORD PTR [r12+264]
|
|
|
- mov QWORD PTR [r12+256], r10
|
|
|
- sbb rax, QWORD PTR [r13+264]
|
|
|
- mov r9, QWORD PTR [r12+272]
|
|
|
- mov QWORD PTR [r12+264], rax
|
|
|
- sbb r9, QWORD PTR [r13+272]
|
|
|
- mov r10, QWORD PTR [r12+280]
|
|
|
- mov QWORD PTR [r12+272], r9
|
|
|
- sbb r10, QWORD PTR [r13+280]
|
|
|
- mov rax, QWORD PTR [r12+288]
|
|
|
- mov QWORD PTR [r12+280], r10
|
|
|
- sbb rax, QWORD PTR [r13+288]
|
|
|
- mov r9, QWORD PTR [r12+296]
|
|
|
- mov QWORD PTR [r12+288], rax
|
|
|
- sbb r9, QWORD PTR [r13+296]
|
|
|
- mov r10, QWORD PTR [r12+304]
|
|
|
- mov QWORD PTR [r12+296], r9
|
|
|
- sbb r10, QWORD PTR [r13+304]
|
|
|
- mov rax, QWORD PTR [r12+312]
|
|
|
- mov QWORD PTR [r12+304], r10
|
|
|
- sbb rax, QWORD PTR [r13+312]
|
|
|
- mov r9, QWORD PTR [r12+320]
|
|
|
- mov QWORD PTR [r12+312], rax
|
|
|
- sbb r9, QWORD PTR [r13+320]
|
|
|
- mov r10, QWORD PTR [r12+328]
|
|
|
- mov QWORD PTR [r12+320], r9
|
|
|
- sbb r10, QWORD PTR [r13+328]
|
|
|
- mov rax, QWORD PTR [r12+336]
|
|
|
- mov QWORD PTR [r12+328], r10
|
|
|
- sbb rax, QWORD PTR [r13+336]
|
|
|
- mov r9, QWORD PTR [r12+344]
|
|
|
- mov QWORD PTR [r12+336], rax
|
|
|
- sbb r9, QWORD PTR [r13+344]
|
|
|
- mov r10, QWORD PTR [r12+352]
|
|
|
- mov QWORD PTR [r12+344], r9
|
|
|
- sbb r10, QWORD PTR [r13+352]
|
|
|
- mov rax, QWORD PTR [r12+360]
|
|
|
- mov QWORD PTR [r12+352], r10
|
|
|
- sbb rax, QWORD PTR [r13+360]
|
|
|
- mov r9, QWORD PTR [r12+368]
|
|
|
- mov QWORD PTR [r12+360], rax
|
|
|
- sbb r9, QWORD PTR [r13+368]
|
|
|
- mov r10, QWORD PTR [r12+376]
|
|
|
- mov QWORD PTR [r12+368], r9
|
|
|
- sbb r10, QWORD PTR [r13+376]
|
|
|
- mov QWORD PTR [r12+376], r10
|
|
|
- sbb r11, 0
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [rcx+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [rcx+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [rcx+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [rcx+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [rcx+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [rcx+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [rcx+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [rcx+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [rcx+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [rcx+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [rcx+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [rcx+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [rcx+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [rcx+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [rcx+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [rcx+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [rcx+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [rcx+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [rcx+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [rcx+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [rcx+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [rcx+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [rcx+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [rcx+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [rcx+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [rcx+248]
|
|
|
- mov r10, QWORD PTR [r12+256]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r10, QWORD PTR [rcx+256]
|
|
|
- mov rax, QWORD PTR [r12+264]
|
|
|
- mov QWORD PTR [r12+256], r10
|
|
|
- sbb rax, QWORD PTR [rcx+264]
|
|
|
- mov r9, QWORD PTR [r12+272]
|
|
|
- mov QWORD PTR [r12+264], rax
|
|
|
- sbb r9, QWORD PTR [rcx+272]
|
|
|
- mov r10, QWORD PTR [r12+280]
|
|
|
- mov QWORD PTR [r12+272], r9
|
|
|
- sbb r10, QWORD PTR [rcx+280]
|
|
|
- mov rax, QWORD PTR [r12+288]
|
|
|
- mov QWORD PTR [r12+280], r10
|
|
|
- sbb rax, QWORD PTR [rcx+288]
|
|
|
- mov r9, QWORD PTR [r12+296]
|
|
|
- mov QWORD PTR [r12+288], rax
|
|
|
- sbb r9, QWORD PTR [rcx+296]
|
|
|
- mov r10, QWORD PTR [r12+304]
|
|
|
- mov QWORD PTR [r12+296], r9
|
|
|
- sbb r10, QWORD PTR [rcx+304]
|
|
|
- mov rax, QWORD PTR [r12+312]
|
|
|
- mov QWORD PTR [r12+304], r10
|
|
|
- sbb rax, QWORD PTR [rcx+312]
|
|
|
- mov r9, QWORD PTR [r12+320]
|
|
|
- mov QWORD PTR [r12+312], rax
|
|
|
- sbb r9, QWORD PTR [rcx+320]
|
|
|
- mov r10, QWORD PTR [r12+328]
|
|
|
- mov QWORD PTR [r12+320], r9
|
|
|
- sbb r10, QWORD PTR [rcx+328]
|
|
|
- mov rax, QWORD PTR [r12+336]
|
|
|
- mov QWORD PTR [r12+328], r10
|
|
|
- sbb rax, QWORD PTR [rcx+336]
|
|
|
- mov r9, QWORD PTR [r12+344]
|
|
|
- mov QWORD PTR [r12+336], rax
|
|
|
- sbb r9, QWORD PTR [rcx+344]
|
|
|
- mov r10, QWORD PTR [r12+352]
|
|
|
- mov QWORD PTR [r12+344], r9
|
|
|
- sbb r10, QWORD PTR [rcx+352]
|
|
|
- mov rax, QWORD PTR [r12+360]
|
|
|
- mov QWORD PTR [r12+352], r10
|
|
|
- sbb rax, QWORD PTR [rcx+360]
|
|
|
- mov r9, QWORD PTR [r12+368]
|
|
|
- mov QWORD PTR [r12+360], rax
|
|
|
- sbb r9, QWORD PTR [rcx+368]
|
|
|
- mov r10, QWORD PTR [r12+376]
|
|
|
- mov QWORD PTR [r12+368], r9
|
|
|
- sbb r10, QWORD PTR [rcx+376]
|
|
|
- mov QWORD PTR [r12+376], r10
|
|
|
- sbb r11, 0
|
|
|
- sub rsi, 192
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r12+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r12+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r12+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r12+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r12+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r12+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r12+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r12+184]
|
|
|
- mov rax, QWORD PTR [rsi+192]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, QWORD PTR [r12+192]
|
|
|
- mov r9, QWORD PTR [rsi+200]
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- adc r9, QWORD PTR [r12+200]
|
|
|
- mov r10, QWORD PTR [rsi+208]
|
|
|
- mov QWORD PTR [rsi+200], r9
|
|
|
- adc r10, QWORD PTR [r12+208]
|
|
|
- mov rax, QWORD PTR [rsi+216]
|
|
|
- mov QWORD PTR [rsi+208], r10
|
|
|
- adc rax, QWORD PTR [r12+216]
|
|
|
- mov r9, QWORD PTR [rsi+224]
|
|
|
- mov QWORD PTR [rsi+216], rax
|
|
|
- adc r9, QWORD PTR [r12+224]
|
|
|
- mov r10, QWORD PTR [rsi+232]
|
|
|
- mov QWORD PTR [rsi+224], r9
|
|
|
- adc r10, QWORD PTR [r12+232]
|
|
|
- mov rax, QWORD PTR [rsi+240]
|
|
|
- mov QWORD PTR [rsi+232], r10
|
|
|
- adc rax, QWORD PTR [r12+240]
|
|
|
- mov r9, QWORD PTR [rsi+248]
|
|
|
- mov QWORD PTR [rsi+240], rax
|
|
|
- adc r9, QWORD PTR [r12+248]
|
|
|
- mov r10, QWORD PTR [rsi+256]
|
|
|
- mov QWORD PTR [rsi+248], r9
|
|
|
- adc r10, QWORD PTR [r12+256]
|
|
|
- mov rax, QWORD PTR [rsi+264]
|
|
|
- mov QWORD PTR [rsi+256], r10
|
|
|
- adc rax, QWORD PTR [r12+264]
|
|
|
- mov r9, QWORD PTR [rsi+272]
|
|
|
- mov QWORD PTR [rsi+264], rax
|
|
|
- adc r9, QWORD PTR [r12+272]
|
|
|
- mov r10, QWORD PTR [rsi+280]
|
|
|
- mov QWORD PTR [rsi+272], r9
|
|
|
- adc r10, QWORD PTR [r12+280]
|
|
|
- mov rax, QWORD PTR [rsi+288]
|
|
|
- mov QWORD PTR [rsi+280], r10
|
|
|
- adc rax, QWORD PTR [r12+288]
|
|
|
- mov r9, QWORD PTR [rsi+296]
|
|
|
- mov QWORD PTR [rsi+288], rax
|
|
|
- adc r9, QWORD PTR [r12+296]
|
|
|
- mov r10, QWORD PTR [rsi+304]
|
|
|
- mov QWORD PTR [rsi+296], r9
|
|
|
- adc r10, QWORD PTR [r12+304]
|
|
|
- mov rax, QWORD PTR [rsi+312]
|
|
|
- mov QWORD PTR [rsi+304], r10
|
|
|
- adc rax, QWORD PTR [r12+312]
|
|
|
- mov r9, QWORD PTR [rsi+320]
|
|
|
- mov QWORD PTR [rsi+312], rax
|
|
|
- adc r9, QWORD PTR [r12+320]
|
|
|
- mov r10, QWORD PTR [rsi+328]
|
|
|
- mov QWORD PTR [rsi+320], r9
|
|
|
- adc r10, QWORD PTR [r12+328]
|
|
|
- mov rax, QWORD PTR [rsi+336]
|
|
|
- mov QWORD PTR [rsi+328], r10
|
|
|
- adc rax, QWORD PTR [r12+336]
|
|
|
- mov r9, QWORD PTR [rsi+344]
|
|
|
- mov QWORD PTR [rsi+336], rax
|
|
|
- adc r9, QWORD PTR [r12+344]
|
|
|
- mov r10, QWORD PTR [rsi+352]
|
|
|
- mov QWORD PTR [rsi+344], r9
|
|
|
- adc r10, QWORD PTR [r12+352]
|
|
|
- mov rax, QWORD PTR [rsi+360]
|
|
|
- mov QWORD PTR [rsi+352], r10
|
|
|
- adc rax, QWORD PTR [r12+360]
|
|
|
- mov r9, QWORD PTR [rsi+368]
|
|
|
- mov QWORD PTR [rsi+360], rax
|
|
|
- adc r9, QWORD PTR [r12+368]
|
|
|
- mov r10, QWORD PTR [rsi+376]
|
|
|
- mov QWORD PTR [rsi+368], r9
|
|
|
- adc r10, QWORD PTR [r12+376]
|
|
|
- mov QWORD PTR [rsi+376], r10
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+576], r11
|
|
|
- add rsi, 192
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r13+184]
|
|
|
- mov rax, QWORD PTR [rsi+192]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, QWORD PTR [r13+192]
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- ; Add to zero
|
|
|
- mov rax, QWORD PTR [r13+200]
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+208]
|
|
|
- mov QWORD PTR [rsi+200], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+216]
|
|
|
- mov QWORD PTR [rsi+208], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+224]
|
|
|
- mov QWORD PTR [rsi+216], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+232]
|
|
|
- mov QWORD PTR [rsi+224], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+240]
|
|
|
- mov QWORD PTR [rsi+232], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+248]
|
|
|
- mov QWORD PTR [rsi+240], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+256]
|
|
|
- mov QWORD PTR [rsi+248], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+264]
|
|
|
- mov QWORD PTR [rsi+256], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+272]
|
|
|
- mov QWORD PTR [rsi+264], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+280]
|
|
|
- mov QWORD PTR [rsi+272], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+288]
|
|
|
- mov QWORD PTR [rsi+280], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+296]
|
|
|
- mov QWORD PTR [rsi+288], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+304]
|
|
|
- mov QWORD PTR [rsi+296], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+312]
|
|
|
- mov QWORD PTR [rsi+304], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+320]
|
|
|
- mov QWORD PTR [rsi+312], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+328]
|
|
|
- mov QWORD PTR [rsi+320], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+336]
|
|
|
- mov QWORD PTR [rsi+328], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+344]
|
|
|
- mov QWORD PTR [rsi+336], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+352]
|
|
|
- mov QWORD PTR [rsi+344], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+360]
|
|
|
- mov QWORD PTR [rsi+352], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+368]
|
|
|
- mov QWORD PTR [rsi+360], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+376]
|
|
|
- mov QWORD PTR [rsi+368], rax
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsi+376], r9
|
|
|
- add rsp, 1192
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mul_48 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mul_avx2_48 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- sub rsp, 1192
|
|
|
- mov QWORD PTR [rsp+1152], rcx
|
|
|
- mov QWORD PTR [rsp+1160], rdx
|
|
|
- mov QWORD PTR [rsp+1168], r8
|
|
|
- lea r12, QWORD PTR [rsp+768]
|
|
|
- lea r14, QWORD PTR [rdx+192]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- xor r15, r15
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- adc r9, QWORD PTR [r14+128]
|
|
|
- mov r10, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- adc r10, QWORD PTR [r14+136]
|
|
|
- mov rax, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- adc rax, QWORD PTR [r14+144]
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- adc r9, QWORD PTR [r14+152]
|
|
|
- mov r10, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- adc r10, QWORD PTR [r14+160]
|
|
|
- mov rax, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- adc rax, QWORD PTR [r14+168]
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- adc r9, QWORD PTR [r14+176]
|
|
|
- mov r10, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- adc r10, QWORD PTR [r14+184]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+1176], r15
|
|
|
- lea r13, QWORD PTR [rsp+960]
|
|
|
- lea r14, QWORD PTR [r8+192]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- xor rdi, rdi
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [r8+8]
|
|
|
- mov QWORD PTR [r13], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [r13+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [r8+32]
|
|
|
- mov QWORD PTR [r13+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mov QWORD PTR [r13+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [r13+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mov QWORD PTR [r13+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [r8+80]
|
|
|
- mov QWORD PTR [r13+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mov QWORD PTR [r13+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [r8+104]
|
|
|
- mov QWORD PTR [r13+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov QWORD PTR [r13+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [r13+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov r9, QWORD PTR [r8+128]
|
|
|
- mov QWORD PTR [r13+120], rax
|
|
|
- adc r9, QWORD PTR [r14+128]
|
|
|
- mov r10, QWORD PTR [r8+136]
|
|
|
- mov QWORD PTR [r13+128], r9
|
|
|
- adc r10, QWORD PTR [r14+136]
|
|
|
- mov rax, QWORD PTR [r8+144]
|
|
|
- mov QWORD PTR [r13+136], r10
|
|
|
- adc rax, QWORD PTR [r14+144]
|
|
|
- mov r9, QWORD PTR [r8+152]
|
|
|
- mov QWORD PTR [r13+144], rax
|
|
|
- adc r9, QWORD PTR [r14+152]
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov QWORD PTR [r13+152], r9
|
|
|
- adc r10, QWORD PTR [r14+160]
|
|
|
- mov rax, QWORD PTR [r8+168]
|
|
|
- mov QWORD PTR [r13+160], r10
|
|
|
- adc rax, QWORD PTR [r14+168]
|
|
|
- mov r9, QWORD PTR [r8+176]
|
|
|
- mov QWORD PTR [r13+168], rax
|
|
|
- adc r9, QWORD PTR [r14+176]
|
|
|
- mov r10, QWORD PTR [r8+184]
|
|
|
- mov QWORD PTR [r13+176], r9
|
|
|
- adc r10, QWORD PTR [r14+184]
|
|
|
- mov QWORD PTR [r13+184], r10
|
|
|
- adc rdi, 0
|
|
|
- mov QWORD PTR [rsp+1184], rdi
|
|
|
- mov r8, r13
|
|
|
- mov rdx, r12
|
|
|
- mov rcx, rsp
|
|
|
- call sp_3072_mul_avx2_24
|
|
|
- mov r8, QWORD PTR [rsp+1168]
|
|
|
- mov rdx, QWORD PTR [rsp+1160]
|
|
|
- lea rcx, QWORD PTR [rsp+384]
|
|
|
- add r8, 192
|
|
|
- add rdx, 192
|
|
|
- call sp_3072_mul_avx2_24
|
|
|
- mov r8, QWORD PTR [rsp+1168]
|
|
|
- mov rdx, QWORD PTR [rsp+1160]
|
|
|
- mov rcx, QWORD PTR [rsp+1152]
|
|
|
- call sp_3072_mul_avx2_24
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, QWORD PTR [rsp+1168]
|
|
|
- mov rdx, QWORD PTR [rsp+1160]
|
|
|
- mov rcx, QWORD PTR [rsp+1152]
|
|
|
-ENDIF
|
|
|
- mov r15, QWORD PTR [rsp+1176]
|
|
|
- mov rdi, QWORD PTR [rsp+1184]
|
|
|
- mov rsi, QWORD PTR [rsp+1152]
|
|
|
- mov r11, r15
|
|
|
- lea r12, QWORD PTR [rsp+768]
|
|
|
- lea r13, QWORD PTR [rsp+960]
|
|
|
- and r11, rdi
|
|
|
- neg r15
|
|
|
- neg rdi
|
|
|
- add rsi, 384
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [r13]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- add rax, r9
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [r13+8]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [r13+16]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [r13+24]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [r13+32]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [r13+40]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [r13+48]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [r13+56]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [r13+64]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [r13+72]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [r13+80]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [r13+88]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [r13+96]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov r10, QWORD PTR [r13+104]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov rax, QWORD PTR [r13+112]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [r13+120]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov r10, QWORD PTR [r13+128]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov rax, QWORD PTR [r13+136]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [r13+144]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov r10, QWORD PTR [r13+152]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov rax, QWORD PTR [r13+160]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [r13+168]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov r10, QWORD PTR [r13+176]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov rax, QWORD PTR [r13+184]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, rax
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc r11, 0
|
|
|
- lea r13, QWORD PTR [rsp+384]
|
|
|
- mov r12, rsp
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [r13+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [r13+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [r13+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [r13+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [r13+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [r13+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [r13+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [r13+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [r13+248]
|
|
|
- mov r10, QWORD PTR [r12+256]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r10, QWORD PTR [r13+256]
|
|
|
- mov rax, QWORD PTR [r12+264]
|
|
|
- mov QWORD PTR [r12+256], r10
|
|
|
- sbb rax, QWORD PTR [r13+264]
|
|
|
- mov r9, QWORD PTR [r12+272]
|
|
|
- mov QWORD PTR [r12+264], rax
|
|
|
- sbb r9, QWORD PTR [r13+272]
|
|
|
- mov r10, QWORD PTR [r12+280]
|
|
|
- mov QWORD PTR [r12+272], r9
|
|
|
- sbb r10, QWORD PTR [r13+280]
|
|
|
- mov rax, QWORD PTR [r12+288]
|
|
|
- mov QWORD PTR [r12+280], r10
|
|
|
- sbb rax, QWORD PTR [r13+288]
|
|
|
- mov r9, QWORD PTR [r12+296]
|
|
|
- mov QWORD PTR [r12+288], rax
|
|
|
- sbb r9, QWORD PTR [r13+296]
|
|
|
- mov r10, QWORD PTR [r12+304]
|
|
|
- mov QWORD PTR [r12+296], r9
|
|
|
- sbb r10, QWORD PTR [r13+304]
|
|
|
- mov rax, QWORD PTR [r12+312]
|
|
|
- mov QWORD PTR [r12+304], r10
|
|
|
- sbb rax, QWORD PTR [r13+312]
|
|
|
- mov r9, QWORD PTR [r12+320]
|
|
|
- mov QWORD PTR [r12+312], rax
|
|
|
- sbb r9, QWORD PTR [r13+320]
|
|
|
- mov r10, QWORD PTR [r12+328]
|
|
|
- mov QWORD PTR [r12+320], r9
|
|
|
- sbb r10, QWORD PTR [r13+328]
|
|
|
- mov rax, QWORD PTR [r12+336]
|
|
|
- mov QWORD PTR [r12+328], r10
|
|
|
- sbb rax, QWORD PTR [r13+336]
|
|
|
- mov r9, QWORD PTR [r12+344]
|
|
|
- mov QWORD PTR [r12+336], rax
|
|
|
- sbb r9, QWORD PTR [r13+344]
|
|
|
- mov r10, QWORD PTR [r12+352]
|
|
|
- mov QWORD PTR [r12+344], r9
|
|
|
- sbb r10, QWORD PTR [r13+352]
|
|
|
- mov rax, QWORD PTR [r12+360]
|
|
|
- mov QWORD PTR [r12+352], r10
|
|
|
- sbb rax, QWORD PTR [r13+360]
|
|
|
- mov r9, QWORD PTR [r12+368]
|
|
|
- mov QWORD PTR [r12+360], rax
|
|
|
- sbb r9, QWORD PTR [r13+368]
|
|
|
- mov r10, QWORD PTR [r12+376]
|
|
|
- mov QWORD PTR [r12+368], r9
|
|
|
- sbb r10, QWORD PTR [r13+376]
|
|
|
- mov QWORD PTR [r12+376], r10
|
|
|
- sbb r11, 0
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [rcx+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [rcx+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [rcx+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [rcx+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [rcx+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [rcx+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [rcx+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [rcx+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [rcx+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [rcx+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [rcx+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [rcx+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [rcx+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [rcx+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [rcx+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [rcx+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [rcx+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [rcx+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [rcx+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [rcx+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [rcx+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [rcx+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [rcx+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [rcx+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [rcx+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [rcx+248]
|
|
|
- mov r10, QWORD PTR [r12+256]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r10, QWORD PTR [rcx+256]
|
|
|
- mov rax, QWORD PTR [r12+264]
|
|
|
- mov QWORD PTR [r12+256], r10
|
|
|
- sbb rax, QWORD PTR [rcx+264]
|
|
|
- mov r9, QWORD PTR [r12+272]
|
|
|
- mov QWORD PTR [r12+264], rax
|
|
|
- sbb r9, QWORD PTR [rcx+272]
|
|
|
- mov r10, QWORD PTR [r12+280]
|
|
|
- mov QWORD PTR [r12+272], r9
|
|
|
- sbb r10, QWORD PTR [rcx+280]
|
|
|
- mov rax, QWORD PTR [r12+288]
|
|
|
- mov QWORD PTR [r12+280], r10
|
|
|
- sbb rax, QWORD PTR [rcx+288]
|
|
|
- mov r9, QWORD PTR [r12+296]
|
|
|
- mov QWORD PTR [r12+288], rax
|
|
|
- sbb r9, QWORD PTR [rcx+296]
|
|
|
- mov r10, QWORD PTR [r12+304]
|
|
|
- mov QWORD PTR [r12+296], r9
|
|
|
- sbb r10, QWORD PTR [rcx+304]
|
|
|
- mov rax, QWORD PTR [r12+312]
|
|
|
- mov QWORD PTR [r12+304], r10
|
|
|
- sbb rax, QWORD PTR [rcx+312]
|
|
|
- mov r9, QWORD PTR [r12+320]
|
|
|
- mov QWORD PTR [r12+312], rax
|
|
|
- sbb r9, QWORD PTR [rcx+320]
|
|
|
- mov r10, QWORD PTR [r12+328]
|
|
|
- mov QWORD PTR [r12+320], r9
|
|
|
- sbb r10, QWORD PTR [rcx+328]
|
|
|
- mov rax, QWORD PTR [r12+336]
|
|
|
- mov QWORD PTR [r12+328], r10
|
|
|
- sbb rax, QWORD PTR [rcx+336]
|
|
|
- mov r9, QWORD PTR [r12+344]
|
|
|
- mov QWORD PTR [r12+336], rax
|
|
|
- sbb r9, QWORD PTR [rcx+344]
|
|
|
- mov r10, QWORD PTR [r12+352]
|
|
|
- mov QWORD PTR [r12+344], r9
|
|
|
- sbb r10, QWORD PTR [rcx+352]
|
|
|
- mov rax, QWORD PTR [r12+360]
|
|
|
- mov QWORD PTR [r12+352], r10
|
|
|
- sbb rax, QWORD PTR [rcx+360]
|
|
|
- mov r9, QWORD PTR [r12+368]
|
|
|
- mov QWORD PTR [r12+360], rax
|
|
|
- sbb r9, QWORD PTR [rcx+368]
|
|
|
- mov r10, QWORD PTR [r12+376]
|
|
|
- mov QWORD PTR [r12+368], r9
|
|
|
- sbb r10, QWORD PTR [rcx+376]
|
|
|
- mov QWORD PTR [r12+376], r10
|
|
|
- sbb r11, 0
|
|
|
- sub rsi, 192
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r12+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r12+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r12+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r12+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r12+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r12+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r12+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r12+184]
|
|
|
- mov rax, QWORD PTR [rsi+192]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, QWORD PTR [r12+192]
|
|
|
- mov r9, QWORD PTR [rsi+200]
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- adc r9, QWORD PTR [r12+200]
|
|
|
- mov r10, QWORD PTR [rsi+208]
|
|
|
- mov QWORD PTR [rsi+200], r9
|
|
|
- adc r10, QWORD PTR [r12+208]
|
|
|
- mov rax, QWORD PTR [rsi+216]
|
|
|
- mov QWORD PTR [rsi+208], r10
|
|
|
- adc rax, QWORD PTR [r12+216]
|
|
|
- mov r9, QWORD PTR [rsi+224]
|
|
|
- mov QWORD PTR [rsi+216], rax
|
|
|
- adc r9, QWORD PTR [r12+224]
|
|
|
- mov r10, QWORD PTR [rsi+232]
|
|
|
- mov QWORD PTR [rsi+224], r9
|
|
|
- adc r10, QWORD PTR [r12+232]
|
|
|
- mov rax, QWORD PTR [rsi+240]
|
|
|
- mov QWORD PTR [rsi+232], r10
|
|
|
- adc rax, QWORD PTR [r12+240]
|
|
|
- mov r9, QWORD PTR [rsi+248]
|
|
|
- mov QWORD PTR [rsi+240], rax
|
|
|
- adc r9, QWORD PTR [r12+248]
|
|
|
- mov r10, QWORD PTR [rsi+256]
|
|
|
- mov QWORD PTR [rsi+248], r9
|
|
|
- adc r10, QWORD PTR [r12+256]
|
|
|
- mov rax, QWORD PTR [rsi+264]
|
|
|
- mov QWORD PTR [rsi+256], r10
|
|
|
- adc rax, QWORD PTR [r12+264]
|
|
|
- mov r9, QWORD PTR [rsi+272]
|
|
|
- mov QWORD PTR [rsi+264], rax
|
|
|
- adc r9, QWORD PTR [r12+272]
|
|
|
- mov r10, QWORD PTR [rsi+280]
|
|
|
- mov QWORD PTR [rsi+272], r9
|
|
|
- adc r10, QWORD PTR [r12+280]
|
|
|
- mov rax, QWORD PTR [rsi+288]
|
|
|
- mov QWORD PTR [rsi+280], r10
|
|
|
- adc rax, QWORD PTR [r12+288]
|
|
|
- mov r9, QWORD PTR [rsi+296]
|
|
|
- mov QWORD PTR [rsi+288], rax
|
|
|
- adc r9, QWORD PTR [r12+296]
|
|
|
- mov r10, QWORD PTR [rsi+304]
|
|
|
- mov QWORD PTR [rsi+296], r9
|
|
|
- adc r10, QWORD PTR [r12+304]
|
|
|
- mov rax, QWORD PTR [rsi+312]
|
|
|
- mov QWORD PTR [rsi+304], r10
|
|
|
- adc rax, QWORD PTR [r12+312]
|
|
|
- mov r9, QWORD PTR [rsi+320]
|
|
|
- mov QWORD PTR [rsi+312], rax
|
|
|
- adc r9, QWORD PTR [r12+320]
|
|
|
- mov r10, QWORD PTR [rsi+328]
|
|
|
- mov QWORD PTR [rsi+320], r9
|
|
|
- adc r10, QWORD PTR [r12+328]
|
|
|
- mov rax, QWORD PTR [rsi+336]
|
|
|
- mov QWORD PTR [rsi+328], r10
|
|
|
- adc rax, QWORD PTR [r12+336]
|
|
|
- mov r9, QWORD PTR [rsi+344]
|
|
|
- mov QWORD PTR [rsi+336], rax
|
|
|
- adc r9, QWORD PTR [r12+344]
|
|
|
- mov r10, QWORD PTR [rsi+352]
|
|
|
- mov QWORD PTR [rsi+344], r9
|
|
|
- adc r10, QWORD PTR [r12+352]
|
|
|
- mov rax, QWORD PTR [rsi+360]
|
|
|
- mov QWORD PTR [rsi+352], r10
|
|
|
- adc rax, QWORD PTR [r12+360]
|
|
|
- mov r9, QWORD PTR [rsi+368]
|
|
|
- mov QWORD PTR [rsi+360], rax
|
|
|
- adc r9, QWORD PTR [r12+368]
|
|
|
- mov r10, QWORD PTR [rsi+376]
|
|
|
- mov QWORD PTR [rsi+368], r9
|
|
|
- adc r10, QWORD PTR [r12+376]
|
|
|
- mov QWORD PTR [rsi+376], r10
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+576], r11
|
|
|
- add rsi, 192
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r13+184]
|
|
|
- mov rax, QWORD PTR [rsi+192]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, QWORD PTR [r13+192]
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- ; Add to zero
|
|
|
- mov rax, QWORD PTR [r13+200]
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+208]
|
|
|
- mov QWORD PTR [rsi+200], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+216]
|
|
|
- mov QWORD PTR [rsi+208], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+224]
|
|
|
- mov QWORD PTR [rsi+216], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+232]
|
|
|
- mov QWORD PTR [rsi+224], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+240]
|
|
|
- mov QWORD PTR [rsi+232], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+248]
|
|
|
- mov QWORD PTR [rsi+240], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+256]
|
|
|
- mov QWORD PTR [rsi+248], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+264]
|
|
|
- mov QWORD PTR [rsi+256], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+272]
|
|
|
- mov QWORD PTR [rsi+264], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+280]
|
|
|
- mov QWORD PTR [rsi+272], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+288]
|
|
|
- mov QWORD PTR [rsi+280], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+296]
|
|
|
- mov QWORD PTR [rsi+288], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+304]
|
|
|
- mov QWORD PTR [rsi+296], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+312]
|
|
|
- mov QWORD PTR [rsi+304], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+320]
|
|
|
- mov QWORD PTR [rsi+312], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+328]
|
|
|
- mov QWORD PTR [rsi+320], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+336]
|
|
|
- mov QWORD PTR [rsi+328], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+344]
|
|
|
- mov QWORD PTR [rsi+336], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+352]
|
|
|
- mov QWORD PTR [rsi+344], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+360]
|
|
|
- mov QWORD PTR [rsi+352], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+368]
|
|
|
- mov QWORD PTR [rsi+360], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+376]
|
|
|
- mov QWORD PTR [rsi+368], rax
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsi+376], r9
|
|
|
- add rsp, 1192
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mul_avx2_48 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_sqr_12 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- mov r8, rdx
|
|
|
- sub rsp, 96
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul rax
|
|
|
- xor r11, r11
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r10, rdx
|
|
|
- ; A[0] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsp+8], r10
|
|
|
- ; A[0] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+16], r11
|
|
|
- ; A[0] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+24], r9
|
|
|
- ; A[0] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[1] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- ; A[0] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- ; A[0] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rsp+48], r9
|
|
|
- ; A[0] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rsp+56], r10
|
|
|
- ; A[0] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+64], r11
|
|
|
- ; A[0] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rsp+72], r9
|
|
|
- ; A[0] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- ; A[0] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- ; A[1] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[2] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rcx+96], r9
|
|
|
- ; A[2] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[3] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- ; A[3] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[4] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- ; A[4] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[5] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- ; A[5] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[6] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rcx+128], r10
|
|
|
- ; A[6] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[7] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rcx+136], r11
|
|
|
- ; A[7] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul rax
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+144], r9
|
|
|
- ; A[8] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[9] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rcx+152], r10
|
|
|
- ; A[9] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+160], r11
|
|
|
- ; A[10] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+80]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+168], r9
|
|
|
- ; A[11] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- mov QWORD PTR [rcx+176], r10
|
|
|
- mov QWORD PTR [rcx+184], r11
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r12, QWORD PTR [rsp+16]
|
|
|
- mov r13, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- mov QWORD PTR [rcx+24], r13
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- mov rdx, QWORD PTR [rsp+40]
|
|
|
- mov r12, QWORD PTR [rsp+48]
|
|
|
- mov r13, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], rdx
|
|
|
- mov QWORD PTR [rcx+48], r12
|
|
|
- mov QWORD PTR [rcx+56], r13
|
|
|
- mov rax, QWORD PTR [rsp+64]
|
|
|
- mov rdx, QWORD PTR [rsp+72]
|
|
|
- mov r12, QWORD PTR [rsp+80]
|
|
|
- mov r13, QWORD PTR [rsp+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], rdx
|
|
|
- mov QWORD PTR [rcx+80], r12
|
|
|
- mov QWORD PTR [rcx+88], r13
|
|
|
- add rsp, 96
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_sqr_12 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_sqr_avx2_12 PROC
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov r8, rcx
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 96
|
|
|
- cmp r9, r8
|
|
|
- mov rbp, rsp
|
|
|
- cmovne rbp, r8
|
|
|
- add r8, 96
|
|
|
- xor r12, r12
|
|
|
- ; Diagonal 1
|
|
|
- ; Zero into %r9
|
|
|
- ; A[1] x A[0]
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- mulx r11, r10, QWORD PTR [r9+8]
|
|
|
- mov QWORD PTR [rbp+8], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[2] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [rbp+16], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[3] x A[0]
|
|
|
- mulx r11, rax, QWORD PTR [r9+24]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [rbp+24], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[4] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [rbp+32], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[5] x A[0]
|
|
|
- mulx r11, rax, QWORD PTR [r9+40]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [rbp+40], r10
|
|
|
- ; No load %r12 - %r8
|
|
|
- ; A[6] x A[0]
|
|
|
- mulx r14, rax, QWORD PTR [r9+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r14, r12
|
|
|
- mov QWORD PTR [rbp+48], r11
|
|
|
- ; No load %r13 - %r9
|
|
|
- ; A[7] x A[0]
|
|
|
- mulx r15, rax, QWORD PTR [r9+56]
|
|
|
- adcx r14, rax
|
|
|
- adox r15, r12
|
|
|
- ; No store %r12 - %r8
|
|
|
- ; No load %r14 - %r8
|
|
|
- ; A[8] x A[0]
|
|
|
- mulx rdi, rax, QWORD PTR [r9+64]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, r12
|
|
|
- ; No store %r13 - %r9
|
|
|
- ; No load %r15 - %r9
|
|
|
- ; A[9] x A[0]
|
|
|
- mulx rsi, rax, QWORD PTR [r9+72]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, r12
|
|
|
- ; No store %r14 - %r8
|
|
|
- ; No load %rbx - %r8
|
|
|
- ; A[10] x A[0]
|
|
|
- mulx rbx, rax, QWORD PTR [r9+80]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, r12
|
|
|
- ; No store %r15 - %r9
|
|
|
- ; Zero into %r9
|
|
|
- ; A[11] x A[0]
|
|
|
- mulx r11, rax, QWORD PTR [r9+88]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, r12
|
|
|
- ; No store %rbx - %r8
|
|
|
- ; Carry
|
|
|
- adcx r11, r12
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- ; Diagonal 2
|
|
|
- mov r11, QWORD PTR [rbp+24]
|
|
|
- mov r10, QWORD PTR [rbp+32]
|
|
|
- ; A[2] x A[1]
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbp+24], r11
|
|
|
- mov r11, QWORD PTR [rbp+40]
|
|
|
- ; A[3] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+24]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbp+32], r10
|
|
|
- mov r10, QWORD PTR [rbp+48]
|
|
|
- ; A[4] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbp+40], r11
|
|
|
- ; No load %r12 - %r9
|
|
|
- ; A[5] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r10, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbp+48], r10
|
|
|
- ; No load %r13 - %r8
|
|
|
- ; A[6] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; No store %r12 - %r9
|
|
|
- ; No load %r14 - %r9
|
|
|
- ; A[7] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; No store %r13 - %r8
|
|
|
- ; No load %r15 - %r8
|
|
|
- ; A[8] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; No store %r14 - %r9
|
|
|
- ; No load %rbx - %r9
|
|
|
- ; A[9] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r15 - %r8
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- ; A[10] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, rcx
|
|
|
- ; No store %rbx - %r9
|
|
|
- ; Zero into %r9
|
|
|
- ; A[11] x A[1]
|
|
|
- mulx r11, rax, QWORD PTR [r9+88]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[11] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx r10, rax, QWORD PTR [r9+88]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- ; Carry
|
|
|
- adcx r10, r13
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- ; Diagonal 3
|
|
|
- mov r10, QWORD PTR [rbp+40]
|
|
|
- mov r11, QWORD PTR [rbp+48]
|
|
|
- ; A[3] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+24]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbp+40], r10
|
|
|
- ; No load %r12 - %r8
|
|
|
- ; A[4] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbp+48], r11
|
|
|
- ; No load %r13 - %r9
|
|
|
- ; A[5] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; No store %r12 - %r8
|
|
|
- ; No load %r14 - %r8
|
|
|
- ; A[6] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; No store %r13 - %r9
|
|
|
- ; No load %r15 - %r9
|
|
|
- ; A[7] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; No store %r14 - %r8
|
|
|
- ; No load %rbx - %r8
|
|
|
- ; A[8] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r15 - %r9
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[9] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- ; A[10] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r11, QWORD PTR [r8+16]
|
|
|
- ; A[10] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+8], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[10] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx r10, rax, QWORD PTR [r9+80]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [r8+16], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[10] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx r11, rax, QWORD PTR [r9+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- ; Carry
|
|
|
- adcx r11, r13
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- ; Diagonal 4
|
|
|
- ; No load %r13 - %r8
|
|
|
- ; A[4] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; No store %r12 - %r9
|
|
|
- ; No load %r14 - %r9
|
|
|
- ; A[5] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; No store %r13 - %r8
|
|
|
- ; No load %r15 - %r8
|
|
|
- ; A[6] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; No store %r14 - %r9
|
|
|
- ; No load %rbx - %r9
|
|
|
- ; A[7] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r15 - %r8
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- ; A[8] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, rcx
|
|
|
- ; No store %rbx - %r9
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[9] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- ; A[9] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- ; A[9] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- ; A[9] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+24], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[9] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx r11, rax, QWORD PTR [r9+72]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[9] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx r10, rax, QWORD PTR [r9+72]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- ; Carry
|
|
|
- adcx r10, r13
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- ; Diagonal 5
|
|
|
- ; No load %r15 - %r9
|
|
|
- ; A[5] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; No store %r14 - %r8
|
|
|
- ; No load %rbx - %r8
|
|
|
- ; A[6] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r15 - %r9
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[7] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- ; A[8] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r11, QWORD PTR [r8+16]
|
|
|
- ; A[8] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+8], r10
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[8] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+16], r11
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- ; A[8] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- ; A[10] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- ; A[10] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+40], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[10] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx r10, rax, QWORD PTR [r9+80]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [r8+48], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[10] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx r11, rax, QWORD PTR [r9+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [r8+56], r10
|
|
|
- ; Carry
|
|
|
- adcx r11, r13
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [r8+64], r11
|
|
|
- ; Diagonal 6
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- ; A[6] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, rcx
|
|
|
- ; No store %rbx - %r9
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[7] x A[5]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- ; A[7] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- ; A[11] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- ; A[11] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+24], r11
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[11] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- ; A[11] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- ; A[11] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- ; A[11] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+56], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[11] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx r11, rax, QWORD PTR [r9+88]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [r8+64], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[11] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx r10, rax, QWORD PTR [r9+88]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [r8+72], r11
|
|
|
- ; Carry
|
|
|
- adcx r10, r13
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [r8+80], r10
|
|
|
- mov QWORD PTR [r8+88], r13
|
|
|
- ; Double and Add in A[i] x A[i]
|
|
|
- mov r11, QWORD PTR [rbp+8]
|
|
|
- ; A[0] x A[0]
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- mov QWORD PTR [rbp], rax
|
|
|
- adox r11, r11
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+8], r11
|
|
|
- mov r10, QWORD PTR [rbp+16]
|
|
|
- mov r11, QWORD PTR [rbp+24]
|
|
|
- ; A[1] x A[1]
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+16], r10
|
|
|
- mov QWORD PTR [rbp+24], r11
|
|
|
- mov r10, QWORD PTR [rbp+32]
|
|
|
- mov r11, QWORD PTR [rbp+40]
|
|
|
- ; A[2] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+32], r10
|
|
|
- mov QWORD PTR [rbp+40], r11
|
|
|
- mov r10, QWORD PTR [rbp+48]
|
|
|
- ; A[3] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r14, r14
|
|
|
- adcx r10, rax
|
|
|
- adcx r14, rcx
|
|
|
- mov QWORD PTR [rbp+48], r10
|
|
|
- ; A[4] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r15, r15
|
|
|
- adox rdi, rdi
|
|
|
- adcx r15, rax
|
|
|
- adcx rdi, rcx
|
|
|
- ; A[5] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox rsi, rsi
|
|
|
- adox rbx, rbx
|
|
|
- adcx rsi, rax
|
|
|
- adcx rbx, rcx
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[6] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- ; A[7] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- mov QWORD PTR [r8+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[8] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- ; A[9] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- mov QWORD PTR [r8+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- ; A[10] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+64], r10
|
|
|
- mov QWORD PTR [r8+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- ; A[11] x A[11]
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+80], r10
|
|
|
- mov QWORD PTR [r8+88], r11
|
|
|
- mov QWORD PTR [r8+-40], r14
|
|
|
- mov QWORD PTR [r8+-32], r15
|
|
|
- mov QWORD PTR [r8+-24], rdi
|
|
|
- mov QWORD PTR [r8+-16], rsi
|
|
|
- mov QWORD PTR [r8+-8], rbx
|
|
|
- sub r8, 96
|
|
|
- cmp r9, r8
|
|
|
- jne L_end_3072_sqr_avx2_12
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp]
|
|
|
- vmovups OWORD PTR [r8], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+16]
|
|
|
- vmovups OWORD PTR [r8+16], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+32]
|
|
|
- vmovups OWORD PTR [r8+32], xmm0
|
|
|
- mov rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [r8+48], rax
|
|
|
-L_end_3072_sqr_avx2_12:
|
|
|
- add rsp, 96
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- ret
|
|
|
-sp_3072_sqr_avx2_12 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * Karatsuba: ah^2, al^2, (al - ah)^2
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_sqr_24 PROC
|
|
|
- sub rsp, 208
|
|
|
- mov QWORD PTR [rsp+192], rcx
|
|
|
- mov QWORD PTR [rsp+200], rdx
|
|
|
- mov r9, 0
|
|
|
- mov r10, rsp
|
|
|
- lea r11, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- sub rax, QWORD PTR [r11]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- sbb r8, QWORD PTR [r11+8]
|
|
|
- mov rax, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- sbb rax, QWORD PTR [r11+16]
|
|
|
- mov r8, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- sbb r8, QWORD PTR [r11+24]
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- sbb rax, QWORD PTR [r11+32]
|
|
|
- mov r8, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- sbb r8, QWORD PTR [r11+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- sbb rax, QWORD PTR [r11+48]
|
|
|
- mov r8, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- sbb r8, QWORD PTR [r11+56]
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- sbb rax, QWORD PTR [r11+64]
|
|
|
- mov r8, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- sbb r8, QWORD PTR [r11+72]
|
|
|
- mov rax, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- sbb rax, QWORD PTR [r11+80]
|
|
|
- mov r8, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- sbb r8, QWORD PTR [r11+88]
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- sbb r9, 0
|
|
|
- ; Cond Negate
|
|
|
- mov rax, QWORD PTR [r10]
|
|
|
- mov r11, r9
|
|
|
- xor rax, r9
|
|
|
- neg r11
|
|
|
- sub rax, r9
|
|
|
- mov r8, QWORD PTR [r10+8]
|
|
|
- sbb r11, 0
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+16]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+24]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+32]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+40]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+56]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+64]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+72]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+80]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+88]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- mov rdx, r10
|
|
|
- mov rcx, rsp
|
|
|
- call sp_3072_sqr_12
|
|
|
- mov rdx, QWORD PTR [rsp+200]
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
- add rdx, 96
|
|
|
- add rcx, 192
|
|
|
- call sp_3072_sqr_12
|
|
|
- mov rdx, QWORD PTR [rsp+200]
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
- call sp_3072_sqr_12
|
|
|
-IFDEF _WIN64
|
|
|
- mov rdx, QWORD PTR [rsp+200]
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
-ENDIF
|
|
|
- mov rdx, QWORD PTR [rsp+192]
|
|
|
- lea r10, QWORD PTR [rsp+96]
|
|
|
- add rdx, 288
|
|
|
- mov r9, 0
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- sub r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r9, 0
|
|
|
- sub rdx, 192
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- sub r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
- neg r9
|
|
|
- add rcx, 192
|
|
|
- mov r8, QWORD PTR [rcx+-96]
|
|
|
- sub r8, QWORD PTR [r10+-96]
|
|
|
- mov rax, QWORD PTR [rcx+-88]
|
|
|
- mov QWORD PTR [rcx+-96], r8
|
|
|
- sbb rax, QWORD PTR [r10+-88]
|
|
|
- mov r8, QWORD PTR [rcx+-80]
|
|
|
- mov QWORD PTR [rcx+-88], rax
|
|
|
- sbb r8, QWORD PTR [r10+-80]
|
|
|
- mov rax, QWORD PTR [rcx+-72]
|
|
|
- mov QWORD PTR [rcx+-80], r8
|
|
|
- sbb rax, QWORD PTR [r10+-72]
|
|
|
- mov r8, QWORD PTR [rcx+-64]
|
|
|
- mov QWORD PTR [rcx+-72], rax
|
|
|
- sbb r8, QWORD PTR [r10+-64]
|
|
|
- mov rax, QWORD PTR [rcx+-56]
|
|
|
- mov QWORD PTR [rcx+-64], r8
|
|
|
- sbb rax, QWORD PTR [r10+-56]
|
|
|
- mov r8, QWORD PTR [rcx+-48]
|
|
|
- mov QWORD PTR [rcx+-56], rax
|
|
|
- sbb r8, QWORD PTR [r10+-48]
|
|
|
- mov rax, QWORD PTR [rcx+-40]
|
|
|
- mov QWORD PTR [rcx+-48], r8
|
|
|
- sbb rax, QWORD PTR [r10+-40]
|
|
|
- mov r8, QWORD PTR [rcx+-32]
|
|
|
- mov QWORD PTR [rcx+-40], rax
|
|
|
- sbb r8, QWORD PTR [r10+-32]
|
|
|
- mov rax, QWORD PTR [rcx+-24]
|
|
|
- mov QWORD PTR [rcx+-32], r8
|
|
|
- sbb rax, QWORD PTR [r10+-24]
|
|
|
- mov r8, QWORD PTR [rcx+-16]
|
|
|
- mov QWORD PTR [rcx+-24], rax
|
|
|
- sbb r8, QWORD PTR [r10+-16]
|
|
|
- mov rax, QWORD PTR [rcx+-8]
|
|
|
- mov QWORD PTR [rcx+-16], r8
|
|
|
- sbb rax, QWORD PTR [r10+-8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rcx+-8], rax
|
|
|
- sbb r8, QWORD PTR [r10]
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb rax, QWORD PTR [r10+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- sbb r8, QWORD PTR [r10+16]
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb rax, QWORD PTR [r10+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- sbb r8, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb rax, QWORD PTR [r10+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- sbb r8, QWORD PTR [r10+48]
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb rax, QWORD PTR [r10+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- sbb r8, QWORD PTR [r10+64]
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb rax, QWORD PTR [r10+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- sbb r8, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
- add rcx, 288
|
|
|
- ; Add in word
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- add r8, r9
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- adc rax, 0
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- mov rdx, QWORD PTR [rsp+200]
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
- add rsp, 208
|
|
|
- ret
|
|
|
-sp_3072_sqr_24 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * Karatsuba: ah^2, al^2, (al - ah)^2
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_sqr_avx2_24 PROC
|
|
|
- sub rsp, 208
|
|
|
- mov QWORD PTR [rsp+192], rcx
|
|
|
- mov QWORD PTR [rsp+200], rdx
|
|
|
- mov r9, 0
|
|
|
- mov r10, rsp
|
|
|
- lea r11, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- sub rax, QWORD PTR [r11]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- sbb r8, QWORD PTR [r11+8]
|
|
|
- mov rax, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- sbb rax, QWORD PTR [r11+16]
|
|
|
- mov r8, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- sbb r8, QWORD PTR [r11+24]
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- sbb rax, QWORD PTR [r11+32]
|
|
|
- mov r8, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- sbb r8, QWORD PTR [r11+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- sbb rax, QWORD PTR [r11+48]
|
|
|
- mov r8, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- sbb r8, QWORD PTR [r11+56]
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- sbb rax, QWORD PTR [r11+64]
|
|
|
- mov r8, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- sbb r8, QWORD PTR [r11+72]
|
|
|
- mov rax, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- sbb rax, QWORD PTR [r11+80]
|
|
|
- mov r8, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- sbb r8, QWORD PTR [r11+88]
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- sbb r9, 0
|
|
|
- ; Cond Negate
|
|
|
- mov rax, QWORD PTR [r10]
|
|
|
- mov r11, r9
|
|
|
- xor rax, r9
|
|
|
- neg r11
|
|
|
- sub rax, r9
|
|
|
- mov r8, QWORD PTR [r10+8]
|
|
|
- sbb r11, 0
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+16]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+24]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+32]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+40]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+56]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+64]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+72]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+80]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+88]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- mov rdx, r10
|
|
|
- mov rcx, rsp
|
|
|
- call sp_3072_sqr_avx2_12
|
|
|
- mov rdx, QWORD PTR [rsp+200]
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
- add rdx, 96
|
|
|
- add rcx, 192
|
|
|
- call sp_3072_sqr_avx2_12
|
|
|
- mov rdx, QWORD PTR [rsp+200]
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
- call sp_3072_sqr_avx2_12
|
|
|
-IFDEF _WIN64
|
|
|
- mov rdx, QWORD PTR [rsp+200]
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
-ENDIF
|
|
|
- mov rdx, QWORD PTR [rsp+192]
|
|
|
- lea r10, QWORD PTR [rsp+96]
|
|
|
- add rdx, 288
|
|
|
- mov r9, 0
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- sub r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r9, 0
|
|
|
- sub rdx, 192
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- sub r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
- neg r9
|
|
|
- add rcx, 192
|
|
|
- mov r8, QWORD PTR [rcx+-96]
|
|
|
- sub r8, QWORD PTR [r10+-96]
|
|
|
- mov rax, QWORD PTR [rcx+-88]
|
|
|
- mov QWORD PTR [rcx+-96], r8
|
|
|
- sbb rax, QWORD PTR [r10+-88]
|
|
|
- mov r8, QWORD PTR [rcx+-80]
|
|
|
- mov QWORD PTR [rcx+-88], rax
|
|
|
- sbb r8, QWORD PTR [r10+-80]
|
|
|
- mov rax, QWORD PTR [rcx+-72]
|
|
|
- mov QWORD PTR [rcx+-80], r8
|
|
|
- sbb rax, QWORD PTR [r10+-72]
|
|
|
- mov r8, QWORD PTR [rcx+-64]
|
|
|
- mov QWORD PTR [rcx+-72], rax
|
|
|
- sbb r8, QWORD PTR [r10+-64]
|
|
|
- mov rax, QWORD PTR [rcx+-56]
|
|
|
- mov QWORD PTR [rcx+-64], r8
|
|
|
- sbb rax, QWORD PTR [r10+-56]
|
|
|
- mov r8, QWORD PTR [rcx+-48]
|
|
|
- mov QWORD PTR [rcx+-56], rax
|
|
|
- sbb r8, QWORD PTR [r10+-48]
|
|
|
- mov rax, QWORD PTR [rcx+-40]
|
|
|
- mov QWORD PTR [rcx+-48], r8
|
|
|
- sbb rax, QWORD PTR [r10+-40]
|
|
|
- mov r8, QWORD PTR [rcx+-32]
|
|
|
- mov QWORD PTR [rcx+-40], rax
|
|
|
- sbb r8, QWORD PTR [r10+-32]
|
|
|
- mov rax, QWORD PTR [rcx+-24]
|
|
|
- mov QWORD PTR [rcx+-32], r8
|
|
|
- sbb rax, QWORD PTR [r10+-24]
|
|
|
- mov r8, QWORD PTR [rcx+-16]
|
|
|
- mov QWORD PTR [rcx+-24], rax
|
|
|
- sbb r8, QWORD PTR [r10+-16]
|
|
|
- mov rax, QWORD PTR [rcx+-8]
|
|
|
- mov QWORD PTR [rcx+-16], r8
|
|
|
- sbb rax, QWORD PTR [r10+-8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rcx+-8], rax
|
|
|
- sbb r8, QWORD PTR [r10]
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb rax, QWORD PTR [r10+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- sbb r8, QWORD PTR [r10+16]
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb rax, QWORD PTR [r10+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- sbb r8, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb rax, QWORD PTR [r10+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- sbb r8, QWORD PTR [r10+48]
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb rax, QWORD PTR [r10+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- sbb r8, QWORD PTR [r10+64]
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb rax, QWORD PTR [r10+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- sbb r8, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
- add rcx, 288
|
|
|
- ; Add in word
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- add r8, r9
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- adc rax, 0
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- mov rdx, QWORD PTR [rsp+200]
|
|
|
- mov rcx, QWORD PTR [rsp+192]
|
|
|
- add rsp, 208
|
|
|
- ret
|
|
|
-sp_3072_sqr_avx2_24 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * Karatsuba: ah^2, al^2, (al - ah)^2
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_sqr_48 PROC
|
|
|
- sub rsp, 400
|
|
|
- mov QWORD PTR [rsp+384], rcx
|
|
|
- mov QWORD PTR [rsp+392], rdx
|
|
|
- mov r9, 0
|
|
|
- mov r10, rsp
|
|
|
- lea r11, QWORD PTR [rdx+192]
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- sub rax, QWORD PTR [r11]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- sbb r8, QWORD PTR [r11+8]
|
|
|
- mov rax, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- sbb rax, QWORD PTR [r11+16]
|
|
|
- mov r8, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- sbb r8, QWORD PTR [r11+24]
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- sbb rax, QWORD PTR [r11+32]
|
|
|
- mov r8, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- sbb r8, QWORD PTR [r11+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- sbb rax, QWORD PTR [r11+48]
|
|
|
- mov r8, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- sbb r8, QWORD PTR [r11+56]
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- sbb rax, QWORD PTR [r11+64]
|
|
|
- mov r8, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- sbb r8, QWORD PTR [r11+72]
|
|
|
- mov rax, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- sbb rax, QWORD PTR [r11+80]
|
|
|
- mov r8, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- sbb r8, QWORD PTR [r11+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- sbb rax, QWORD PTR [r11+96]
|
|
|
- mov r8, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- sbb r8, QWORD PTR [r11+104]
|
|
|
- mov rax, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- sbb rax, QWORD PTR [r11+112]
|
|
|
- mov r8, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- sbb r8, QWORD PTR [r11+120]
|
|
|
- mov rax, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- sbb rax, QWORD PTR [r11+128]
|
|
|
- mov r8, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [r10+128], rax
|
|
|
- sbb r8, QWORD PTR [r11+136]
|
|
|
- mov rax, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [r10+136], r8
|
|
|
- sbb rax, QWORD PTR [r11+144]
|
|
|
- mov r8, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [r10+144], rax
|
|
|
- sbb r8, QWORD PTR [r11+152]
|
|
|
- mov rax, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [r10+152], r8
|
|
|
- sbb rax, QWORD PTR [r11+160]
|
|
|
- mov r8, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [r10+160], rax
|
|
|
- sbb r8, QWORD PTR [r11+168]
|
|
|
- mov rax, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [r10+168], r8
|
|
|
- sbb rax, QWORD PTR [r11+176]
|
|
|
- mov r8, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r10+176], rax
|
|
|
- sbb r8, QWORD PTR [r11+184]
|
|
|
- mov QWORD PTR [r10+184], r8
|
|
|
- sbb r9, 0
|
|
|
- ; Cond Negate
|
|
|
- mov rax, QWORD PTR [r10]
|
|
|
- mov r11, r9
|
|
|
- xor rax, r9
|
|
|
- neg r11
|
|
|
- sub rax, r9
|
|
|
- mov r8, QWORD PTR [r10+8]
|
|
|
- sbb r11, 0
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+16]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+24]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+32]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+40]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+56]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+64]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+72]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+80]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+88]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+104]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+112]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+120]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+128]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+136]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+128], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+144]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+136], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+152]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+144], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+160]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+152], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+168]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+160], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+176]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+168], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+184]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+176], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov QWORD PTR [r10+184], r8
|
|
|
- mov rdx, r10
|
|
|
- mov rcx, rsp
|
|
|
- call sp_3072_sqr_24
|
|
|
- mov rdx, QWORD PTR [rsp+392]
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
- add rdx, 192
|
|
|
- add rcx, 384
|
|
|
- call sp_3072_sqr_24
|
|
|
- mov rdx, QWORD PTR [rsp+392]
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
- call sp_3072_sqr_24
|
|
|
-IFDEF _WIN64
|
|
|
- mov rdx, QWORD PTR [rsp+392]
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
-ENDIF
|
|
|
- mov rdx, QWORD PTR [rsp+384]
|
|
|
- lea r10, QWORD PTR [rsp+192]
|
|
|
- add rdx, 576
|
|
|
- mov r9, 0
|
|
|
- mov r8, QWORD PTR [r10+-192]
|
|
|
- sub r8, QWORD PTR [rdx+-192]
|
|
|
- mov rax, QWORD PTR [r10+-184]
|
|
|
- mov QWORD PTR [r10+-192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-184]
|
|
|
- mov r8, QWORD PTR [r10+-176]
|
|
|
- mov QWORD PTR [r10+-184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-176]
|
|
|
- mov rax, QWORD PTR [r10+-168]
|
|
|
- mov QWORD PTR [r10+-176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-168]
|
|
|
- mov r8, QWORD PTR [r10+-160]
|
|
|
- mov QWORD PTR [r10+-168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-160]
|
|
|
- mov rax, QWORD PTR [r10+-152]
|
|
|
- mov QWORD PTR [r10+-160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-152]
|
|
|
- mov r8, QWORD PTR [r10+-144]
|
|
|
- mov QWORD PTR [r10+-152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-144]
|
|
|
- mov rax, QWORD PTR [r10+-136]
|
|
|
- mov QWORD PTR [r10+-144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-136]
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- mov QWORD PTR [r10+-136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [r10+128]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov rax, QWORD PTR [r10+136]
|
|
|
- mov QWORD PTR [r10+128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [r10+144]
|
|
|
- mov QWORD PTR [r10+136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov rax, QWORD PTR [r10+152]
|
|
|
- mov QWORD PTR [r10+144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [r10+160]
|
|
|
- mov QWORD PTR [r10+152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov QWORD PTR [r10+160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [r10+176]
|
|
|
- mov QWORD PTR [r10+168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov rax, QWORD PTR [r10+184]
|
|
|
- mov QWORD PTR [r10+176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r10+184], rax
|
|
|
- sbb r9, 0
|
|
|
- sub rdx, 384
|
|
|
- mov r8, QWORD PTR [r10+-192]
|
|
|
- sub r8, QWORD PTR [rdx+-192]
|
|
|
- mov rax, QWORD PTR [r10+-184]
|
|
|
- mov QWORD PTR [r10+-192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-184]
|
|
|
- mov r8, QWORD PTR [r10+-176]
|
|
|
- mov QWORD PTR [r10+-184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-176]
|
|
|
- mov rax, QWORD PTR [r10+-168]
|
|
|
- mov QWORD PTR [r10+-176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-168]
|
|
|
- mov r8, QWORD PTR [r10+-160]
|
|
|
- mov QWORD PTR [r10+-168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-160]
|
|
|
- mov rax, QWORD PTR [r10+-152]
|
|
|
- mov QWORD PTR [r10+-160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-152]
|
|
|
- mov r8, QWORD PTR [r10+-144]
|
|
|
- mov QWORD PTR [r10+-152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-144]
|
|
|
- mov rax, QWORD PTR [r10+-136]
|
|
|
- mov QWORD PTR [r10+-144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-136]
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- mov QWORD PTR [r10+-136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [r10+128]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov rax, QWORD PTR [r10+136]
|
|
|
- mov QWORD PTR [r10+128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [r10+144]
|
|
|
- mov QWORD PTR [r10+136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov rax, QWORD PTR [r10+152]
|
|
|
- mov QWORD PTR [r10+144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [r10+160]
|
|
|
- mov QWORD PTR [r10+152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov QWORD PTR [r10+160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [r10+176]
|
|
|
- mov QWORD PTR [r10+168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov rax, QWORD PTR [r10+184]
|
|
|
- mov QWORD PTR [r10+176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r10+184], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
- neg r9
|
|
|
- add rcx, 384
|
|
|
- mov r8, QWORD PTR [rcx+-192]
|
|
|
- sub r8, QWORD PTR [r10+-192]
|
|
|
- mov rax, QWORD PTR [rcx+-184]
|
|
|
- mov QWORD PTR [rcx+-192], r8
|
|
|
- sbb rax, QWORD PTR [r10+-184]
|
|
|
- mov r8, QWORD PTR [rcx+-176]
|
|
|
- mov QWORD PTR [rcx+-184], rax
|
|
|
- sbb r8, QWORD PTR [r10+-176]
|
|
|
- mov rax, QWORD PTR [rcx+-168]
|
|
|
- mov QWORD PTR [rcx+-176], r8
|
|
|
- sbb rax, QWORD PTR [r10+-168]
|
|
|
- mov r8, QWORD PTR [rcx+-160]
|
|
|
- mov QWORD PTR [rcx+-168], rax
|
|
|
- sbb r8, QWORD PTR [r10+-160]
|
|
|
- mov rax, QWORD PTR [rcx+-152]
|
|
|
- mov QWORD PTR [rcx+-160], r8
|
|
|
- sbb rax, QWORD PTR [r10+-152]
|
|
|
- mov r8, QWORD PTR [rcx+-144]
|
|
|
- mov QWORD PTR [rcx+-152], rax
|
|
|
- sbb r8, QWORD PTR [r10+-144]
|
|
|
- mov rax, QWORD PTR [rcx+-136]
|
|
|
- mov QWORD PTR [rcx+-144], r8
|
|
|
- sbb rax, QWORD PTR [r10+-136]
|
|
|
- mov r8, QWORD PTR [rcx+-128]
|
|
|
- mov QWORD PTR [rcx+-136], rax
|
|
|
- sbb r8, QWORD PTR [r10+-128]
|
|
|
- mov rax, QWORD PTR [rcx+-120]
|
|
|
- mov QWORD PTR [rcx+-128], r8
|
|
|
- sbb rax, QWORD PTR [r10+-120]
|
|
|
- mov r8, QWORD PTR [rcx+-112]
|
|
|
- mov QWORD PTR [rcx+-120], rax
|
|
|
- sbb r8, QWORD PTR [r10+-112]
|
|
|
- mov rax, QWORD PTR [rcx+-104]
|
|
|
- mov QWORD PTR [rcx+-112], r8
|
|
|
- sbb rax, QWORD PTR [r10+-104]
|
|
|
- mov r8, QWORD PTR [rcx+-96]
|
|
|
- mov QWORD PTR [rcx+-104], rax
|
|
|
- sbb r8, QWORD PTR [r10+-96]
|
|
|
- mov rax, QWORD PTR [rcx+-88]
|
|
|
- mov QWORD PTR [rcx+-96], r8
|
|
|
- sbb rax, QWORD PTR [r10+-88]
|
|
|
- mov r8, QWORD PTR [rcx+-80]
|
|
|
- mov QWORD PTR [rcx+-88], rax
|
|
|
- sbb r8, QWORD PTR [r10+-80]
|
|
|
- mov rax, QWORD PTR [rcx+-72]
|
|
|
- mov QWORD PTR [rcx+-80], r8
|
|
|
- sbb rax, QWORD PTR [r10+-72]
|
|
|
- mov r8, QWORD PTR [rcx+-64]
|
|
|
- mov QWORD PTR [rcx+-72], rax
|
|
|
- sbb r8, QWORD PTR [r10+-64]
|
|
|
- mov rax, QWORD PTR [rcx+-56]
|
|
|
- mov QWORD PTR [rcx+-64], r8
|
|
|
- sbb rax, QWORD PTR [r10+-56]
|
|
|
- mov r8, QWORD PTR [rcx+-48]
|
|
|
- mov QWORD PTR [rcx+-56], rax
|
|
|
- sbb r8, QWORD PTR [r10+-48]
|
|
|
- mov rax, QWORD PTR [rcx+-40]
|
|
|
- mov QWORD PTR [rcx+-48], r8
|
|
|
- sbb rax, QWORD PTR [r10+-40]
|
|
|
- mov r8, QWORD PTR [rcx+-32]
|
|
|
- mov QWORD PTR [rcx+-40], rax
|
|
|
- sbb r8, QWORD PTR [r10+-32]
|
|
|
- mov rax, QWORD PTR [rcx+-24]
|
|
|
- mov QWORD PTR [rcx+-32], r8
|
|
|
- sbb rax, QWORD PTR [r10+-24]
|
|
|
- mov r8, QWORD PTR [rcx+-16]
|
|
|
- mov QWORD PTR [rcx+-24], rax
|
|
|
- sbb r8, QWORD PTR [r10+-16]
|
|
|
- mov rax, QWORD PTR [rcx+-8]
|
|
|
- mov QWORD PTR [rcx+-16], r8
|
|
|
- sbb rax, QWORD PTR [r10+-8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rcx+-8], rax
|
|
|
- sbb r8, QWORD PTR [r10]
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb rax, QWORD PTR [r10+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- sbb r8, QWORD PTR [r10+16]
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb rax, QWORD PTR [r10+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- sbb r8, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb rax, QWORD PTR [r10+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- sbb r8, QWORD PTR [r10+48]
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb rax, QWORD PTR [r10+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- sbb r8, QWORD PTR [r10+64]
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb rax, QWORD PTR [r10+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- sbb r8, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb rax, QWORD PTR [r10+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- sbb r8, QWORD PTR [r10+96]
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb rax, QWORD PTR [r10+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- sbb r8, QWORD PTR [r10+112]
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb rax, QWORD PTR [r10+120]
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- sbb r8, QWORD PTR [r10+128]
|
|
|
- mov rax, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- sbb rax, QWORD PTR [r10+136]
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], rax
|
|
|
- sbb r8, QWORD PTR [r10+144]
|
|
|
- mov rax, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- sbb rax, QWORD PTR [r10+152]
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], rax
|
|
|
- sbb r8, QWORD PTR [r10+160]
|
|
|
- mov rax, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- sbb rax, QWORD PTR [r10+168]
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], rax
|
|
|
- sbb r8, QWORD PTR [r10+176]
|
|
|
- mov rax, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- sbb rax, QWORD PTR [r10+184]
|
|
|
- mov QWORD PTR [rcx+184], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
- add rcx, 576
|
|
|
- ; Add in word
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- add r8, r9
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- adc rax, 0
|
|
|
- mov QWORD PTR [rcx+184], rax
|
|
|
- mov rdx, QWORD PTR [rsp+392]
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
- add rsp, 400
|
|
|
- ret
|
|
|
-sp_3072_sqr_48 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * Karatsuba: ah^2, al^2, (al - ah)^2
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_sqr_avx2_48 PROC
|
|
|
- sub rsp, 400
|
|
|
- mov QWORD PTR [rsp+384], rcx
|
|
|
- mov QWORD PTR [rsp+392], rdx
|
|
|
- mov r9, 0
|
|
|
- mov r10, rsp
|
|
|
- lea r11, QWORD PTR [rdx+192]
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- sub rax, QWORD PTR [r11]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- sbb r8, QWORD PTR [r11+8]
|
|
|
- mov rax, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- sbb rax, QWORD PTR [r11+16]
|
|
|
- mov r8, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- sbb r8, QWORD PTR [r11+24]
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- sbb rax, QWORD PTR [r11+32]
|
|
|
- mov r8, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- sbb r8, QWORD PTR [r11+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- sbb rax, QWORD PTR [r11+48]
|
|
|
- mov r8, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- sbb r8, QWORD PTR [r11+56]
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- sbb rax, QWORD PTR [r11+64]
|
|
|
- mov r8, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- sbb r8, QWORD PTR [r11+72]
|
|
|
- mov rax, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- sbb rax, QWORD PTR [r11+80]
|
|
|
- mov r8, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- sbb r8, QWORD PTR [r11+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- sbb rax, QWORD PTR [r11+96]
|
|
|
- mov r8, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- sbb r8, QWORD PTR [r11+104]
|
|
|
- mov rax, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- sbb rax, QWORD PTR [r11+112]
|
|
|
- mov r8, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- sbb r8, QWORD PTR [r11+120]
|
|
|
- mov rax, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- sbb rax, QWORD PTR [r11+128]
|
|
|
- mov r8, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [r10+128], rax
|
|
|
- sbb r8, QWORD PTR [r11+136]
|
|
|
- mov rax, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [r10+136], r8
|
|
|
- sbb rax, QWORD PTR [r11+144]
|
|
|
- mov r8, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [r10+144], rax
|
|
|
- sbb r8, QWORD PTR [r11+152]
|
|
|
- mov rax, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [r10+152], r8
|
|
|
- sbb rax, QWORD PTR [r11+160]
|
|
|
- mov r8, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [r10+160], rax
|
|
|
- sbb r8, QWORD PTR [r11+168]
|
|
|
- mov rax, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [r10+168], r8
|
|
|
- sbb rax, QWORD PTR [r11+176]
|
|
|
- mov r8, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r10+176], rax
|
|
|
- sbb r8, QWORD PTR [r11+184]
|
|
|
- mov QWORD PTR [r10+184], r8
|
|
|
- sbb r9, 0
|
|
|
- ; Cond Negate
|
|
|
- mov rax, QWORD PTR [r10]
|
|
|
- mov r11, r9
|
|
|
- xor rax, r9
|
|
|
- neg r11
|
|
|
- sub rax, r9
|
|
|
- mov r8, QWORD PTR [r10+8]
|
|
|
- sbb r11, 0
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+16]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+24]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+32]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+40]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+56]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+64]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+72]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+80]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+88]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+104]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+112]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+120]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+128]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+136]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+128], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+144]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+136], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+152]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+144], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+160]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+152], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+168]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+160], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+176]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+168], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+184]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+176], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov QWORD PTR [r10+184], r8
|
|
|
- mov rdx, r10
|
|
|
- mov rcx, rsp
|
|
|
- call sp_3072_sqr_avx2_24
|
|
|
- mov rdx, QWORD PTR [rsp+392]
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
- add rdx, 192
|
|
|
- add rcx, 384
|
|
|
- call sp_3072_sqr_avx2_24
|
|
|
- mov rdx, QWORD PTR [rsp+392]
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
- call sp_3072_sqr_avx2_24
|
|
|
-IFDEF _WIN64
|
|
|
- mov rdx, QWORD PTR [rsp+392]
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
-ENDIF
|
|
|
- mov rdx, QWORD PTR [rsp+384]
|
|
|
- lea r10, QWORD PTR [rsp+192]
|
|
|
- add rdx, 576
|
|
|
- mov r9, 0
|
|
|
- mov r8, QWORD PTR [r10+-192]
|
|
|
- sub r8, QWORD PTR [rdx+-192]
|
|
|
- mov rax, QWORD PTR [r10+-184]
|
|
|
- mov QWORD PTR [r10+-192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-184]
|
|
|
- mov r8, QWORD PTR [r10+-176]
|
|
|
- mov QWORD PTR [r10+-184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-176]
|
|
|
- mov rax, QWORD PTR [r10+-168]
|
|
|
- mov QWORD PTR [r10+-176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-168]
|
|
|
- mov r8, QWORD PTR [r10+-160]
|
|
|
- mov QWORD PTR [r10+-168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-160]
|
|
|
- mov rax, QWORD PTR [r10+-152]
|
|
|
- mov QWORD PTR [r10+-160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-152]
|
|
|
- mov r8, QWORD PTR [r10+-144]
|
|
|
- mov QWORD PTR [r10+-152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-144]
|
|
|
- mov rax, QWORD PTR [r10+-136]
|
|
|
- mov QWORD PTR [r10+-144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-136]
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- mov QWORD PTR [r10+-136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [r10+128]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov rax, QWORD PTR [r10+136]
|
|
|
- mov QWORD PTR [r10+128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [r10+144]
|
|
|
- mov QWORD PTR [r10+136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov rax, QWORD PTR [r10+152]
|
|
|
- mov QWORD PTR [r10+144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [r10+160]
|
|
|
- mov QWORD PTR [r10+152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov QWORD PTR [r10+160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [r10+176]
|
|
|
- mov QWORD PTR [r10+168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov rax, QWORD PTR [r10+184]
|
|
|
- mov QWORD PTR [r10+176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r10+184], rax
|
|
|
- sbb r9, 0
|
|
|
- sub rdx, 384
|
|
|
- mov r8, QWORD PTR [r10+-192]
|
|
|
- sub r8, QWORD PTR [rdx+-192]
|
|
|
- mov rax, QWORD PTR [r10+-184]
|
|
|
- mov QWORD PTR [r10+-192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-184]
|
|
|
- mov r8, QWORD PTR [r10+-176]
|
|
|
- mov QWORD PTR [r10+-184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-176]
|
|
|
- mov rax, QWORD PTR [r10+-168]
|
|
|
- mov QWORD PTR [r10+-176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-168]
|
|
|
- mov r8, QWORD PTR [r10+-160]
|
|
|
- mov QWORD PTR [r10+-168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-160]
|
|
|
- mov rax, QWORD PTR [r10+-152]
|
|
|
- mov QWORD PTR [r10+-160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-152]
|
|
|
- mov r8, QWORD PTR [r10+-144]
|
|
|
- mov QWORD PTR [r10+-152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-144]
|
|
|
- mov rax, QWORD PTR [r10+-136]
|
|
|
- mov QWORD PTR [r10+-144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-136]
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- mov QWORD PTR [r10+-136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [r10+128]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov rax, QWORD PTR [r10+136]
|
|
|
- mov QWORD PTR [r10+128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [r10+144]
|
|
|
- mov QWORD PTR [r10+136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov rax, QWORD PTR [r10+152]
|
|
|
- mov QWORD PTR [r10+144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [r10+160]
|
|
|
- mov QWORD PTR [r10+152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov QWORD PTR [r10+160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [r10+176]
|
|
|
- mov QWORD PTR [r10+168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov rax, QWORD PTR [r10+184]
|
|
|
- mov QWORD PTR [r10+176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r10+184], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
- neg r9
|
|
|
- add rcx, 384
|
|
|
- mov r8, QWORD PTR [rcx+-192]
|
|
|
- sub r8, QWORD PTR [r10+-192]
|
|
|
- mov rax, QWORD PTR [rcx+-184]
|
|
|
- mov QWORD PTR [rcx+-192], r8
|
|
|
- sbb rax, QWORD PTR [r10+-184]
|
|
|
- mov r8, QWORD PTR [rcx+-176]
|
|
|
- mov QWORD PTR [rcx+-184], rax
|
|
|
- sbb r8, QWORD PTR [r10+-176]
|
|
|
- mov rax, QWORD PTR [rcx+-168]
|
|
|
- mov QWORD PTR [rcx+-176], r8
|
|
|
- sbb rax, QWORD PTR [r10+-168]
|
|
|
- mov r8, QWORD PTR [rcx+-160]
|
|
|
- mov QWORD PTR [rcx+-168], rax
|
|
|
- sbb r8, QWORD PTR [r10+-160]
|
|
|
- mov rax, QWORD PTR [rcx+-152]
|
|
|
- mov QWORD PTR [rcx+-160], r8
|
|
|
- sbb rax, QWORD PTR [r10+-152]
|
|
|
- mov r8, QWORD PTR [rcx+-144]
|
|
|
- mov QWORD PTR [rcx+-152], rax
|
|
|
- sbb r8, QWORD PTR [r10+-144]
|
|
|
- mov rax, QWORD PTR [rcx+-136]
|
|
|
- mov QWORD PTR [rcx+-144], r8
|
|
|
- sbb rax, QWORD PTR [r10+-136]
|
|
|
- mov r8, QWORD PTR [rcx+-128]
|
|
|
- mov QWORD PTR [rcx+-136], rax
|
|
|
- sbb r8, QWORD PTR [r10+-128]
|
|
|
- mov rax, QWORD PTR [rcx+-120]
|
|
|
- mov QWORD PTR [rcx+-128], r8
|
|
|
- sbb rax, QWORD PTR [r10+-120]
|
|
|
- mov r8, QWORD PTR [rcx+-112]
|
|
|
- mov QWORD PTR [rcx+-120], rax
|
|
|
- sbb r8, QWORD PTR [r10+-112]
|
|
|
- mov rax, QWORD PTR [rcx+-104]
|
|
|
- mov QWORD PTR [rcx+-112], r8
|
|
|
- sbb rax, QWORD PTR [r10+-104]
|
|
|
- mov r8, QWORD PTR [rcx+-96]
|
|
|
- mov QWORD PTR [rcx+-104], rax
|
|
|
- sbb r8, QWORD PTR [r10+-96]
|
|
|
- mov rax, QWORD PTR [rcx+-88]
|
|
|
- mov QWORD PTR [rcx+-96], r8
|
|
|
- sbb rax, QWORD PTR [r10+-88]
|
|
|
- mov r8, QWORD PTR [rcx+-80]
|
|
|
- mov QWORD PTR [rcx+-88], rax
|
|
|
- sbb r8, QWORD PTR [r10+-80]
|
|
|
- mov rax, QWORD PTR [rcx+-72]
|
|
|
- mov QWORD PTR [rcx+-80], r8
|
|
|
- sbb rax, QWORD PTR [r10+-72]
|
|
|
- mov r8, QWORD PTR [rcx+-64]
|
|
|
- mov QWORD PTR [rcx+-72], rax
|
|
|
- sbb r8, QWORD PTR [r10+-64]
|
|
|
- mov rax, QWORD PTR [rcx+-56]
|
|
|
- mov QWORD PTR [rcx+-64], r8
|
|
|
- sbb rax, QWORD PTR [r10+-56]
|
|
|
- mov r8, QWORD PTR [rcx+-48]
|
|
|
- mov QWORD PTR [rcx+-56], rax
|
|
|
- sbb r8, QWORD PTR [r10+-48]
|
|
|
- mov rax, QWORD PTR [rcx+-40]
|
|
|
- mov QWORD PTR [rcx+-48], r8
|
|
|
- sbb rax, QWORD PTR [r10+-40]
|
|
|
- mov r8, QWORD PTR [rcx+-32]
|
|
|
- mov QWORD PTR [rcx+-40], rax
|
|
|
- sbb r8, QWORD PTR [r10+-32]
|
|
|
- mov rax, QWORD PTR [rcx+-24]
|
|
|
- mov QWORD PTR [rcx+-32], r8
|
|
|
- sbb rax, QWORD PTR [r10+-24]
|
|
|
- mov r8, QWORD PTR [rcx+-16]
|
|
|
- mov QWORD PTR [rcx+-24], rax
|
|
|
- sbb r8, QWORD PTR [r10+-16]
|
|
|
- mov rax, QWORD PTR [rcx+-8]
|
|
|
- mov QWORD PTR [rcx+-16], r8
|
|
|
- sbb rax, QWORD PTR [r10+-8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rcx+-8], rax
|
|
|
- sbb r8, QWORD PTR [r10]
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb rax, QWORD PTR [r10+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- sbb r8, QWORD PTR [r10+16]
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb rax, QWORD PTR [r10+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- sbb r8, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb rax, QWORD PTR [r10+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- sbb r8, QWORD PTR [r10+48]
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb rax, QWORD PTR [r10+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- sbb r8, QWORD PTR [r10+64]
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb rax, QWORD PTR [r10+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- sbb r8, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb rax, QWORD PTR [r10+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- sbb r8, QWORD PTR [r10+96]
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb rax, QWORD PTR [r10+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- sbb r8, QWORD PTR [r10+112]
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb rax, QWORD PTR [r10+120]
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- sbb r8, QWORD PTR [r10+128]
|
|
|
- mov rax, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- sbb rax, QWORD PTR [r10+136]
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], rax
|
|
|
- sbb r8, QWORD PTR [r10+144]
|
|
|
- mov rax, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- sbb rax, QWORD PTR [r10+152]
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], rax
|
|
|
- sbb r8, QWORD PTR [r10+160]
|
|
|
- mov rax, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- sbb rax, QWORD PTR [r10+168]
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], rax
|
|
|
- sbb r8, QWORD PTR [r10+176]
|
|
|
- mov rax, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- sbb rax, QWORD PTR [r10+184]
|
|
|
- mov QWORD PTR [rcx+184], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
- add rcx, 576
|
|
|
- ; Add in word
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- add r8, r9
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- adc rax, 0
|
|
|
- mov QWORD PTR [rcx+184], rax
|
|
|
- mov rdx, QWORD PTR [rsp+392]
|
|
|
- mov rcx, QWORD PTR [rsp+384]
|
|
|
- add rsp, 400
|
|
|
- ret
|
|
|
-sp_3072_sqr_avx2_48 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mul_d_48 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- mov r10, rax
|
|
|
- mov r11, rdx
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- ; A[1] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[16] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+128]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[17] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+136]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[18] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+144]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[19] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+152]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[20] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+160]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[21] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+168]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[22] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+176]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[23] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+184]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[24] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+192]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[25] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+200]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[26] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+208]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+208], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[27] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+216]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[28] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+224]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[29] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+232]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[30] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+240]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[31] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+248]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[32] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+256]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+256], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[33] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+264]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+264], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[34] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+272]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+272], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[35] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+280]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+280], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[36] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+288]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+288], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[37] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+296]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+296], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[38] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+304]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+304], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[39] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+312]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+312], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[40] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+320]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+320], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[41] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+328]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+328], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[42] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+336]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+336], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[43] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+344]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+344], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[44] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+352]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+352], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[45] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+360]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+360], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[46] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+368]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+368], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[47] * B
|
|
|
- mov rax, r8
|
|
|
- mul QWORD PTR [r9+376]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- mov QWORD PTR [rcx+376], r12
|
|
|
- mov QWORD PTR [rcx+384], r10
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mul_d_48 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_cond_sub_24 PROC
|
|
|
- sub rsp, 192
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov r10, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [r8+136]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+128], r10
|
|
|
- mov QWORD PTR [rsp+136], r11
|
|
|
- mov r10, QWORD PTR [r8+144]
|
|
|
- mov r11, QWORD PTR [r8+152]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+144], r10
|
|
|
- mov QWORD PTR [rsp+152], r11
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+160], r10
|
|
|
- mov QWORD PTR [rsp+168], r11
|
|
|
- mov r10, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [r8+184]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+176], r10
|
|
|
- mov QWORD PTR [rsp+184], r11
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rsp]
|
|
|
- sub r10, r8
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rsp+8]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r8, QWORD PTR [rsp+16]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rsp+24]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rsp+32]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rsp+40]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r8, QWORD PTR [rsp+48]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rsp+56]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov r8, QWORD PTR [rsp+64]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov r11, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rsp+72]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r8, QWORD PTR [rsp+80]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rsp+88]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- mov r8, QWORD PTR [rsp+96]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rsp+104]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r8, QWORD PTR [rsp+112]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rsp+120]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov r10, QWORD PTR [rdx+128]
|
|
|
- mov r8, QWORD PTR [rsp+128]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- mov r11, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [rsp+136]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+128], r10
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- mov r8, QWORD PTR [rsp+144]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+136], r11
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rsp+152]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- mov r10, QWORD PTR [rdx+160]
|
|
|
- mov r8, QWORD PTR [rsp+160]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- mov r11, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [rsp+168]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+160], r10
|
|
|
- mov r10, QWORD PTR [rdx+176]
|
|
|
- mov r8, QWORD PTR [rsp+176]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+168], r11
|
|
|
- mov r11, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rsp+184]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+176], r10
|
|
|
- mov QWORD PTR [rcx+184], r11
|
|
|
- sbb rax, rax
|
|
|
- add rsp, 192
|
|
|
- ret
|
|
|
-sp_3072_cond_sub_24 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 3072 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mont_reduce_24 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov r9, rdx
|
|
|
- xor rsi, rsi
|
|
|
- ; i = 24
|
|
|
- mov r10, 24
|
|
|
- mov r15, QWORD PTR [rcx]
|
|
|
- mov rdi, QWORD PTR [rcx+8]
|
|
|
-L_3072_mont_reduce_24_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov r13, r15
|
|
|
- imul r13, r8
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- add r15, rax
|
|
|
- adc r12, rdx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- mov r15, rdi
|
|
|
- add r15, rax
|
|
|
- adc r11, rdx
|
|
|
- add r15, r12
|
|
|
- adc r11, 0
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- mov rdi, QWORD PTR [rcx+16]
|
|
|
- add rdi, rax
|
|
|
- adc r12, rdx
|
|
|
- add rdi, r11
|
|
|
- adc r12, 0
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- mov r14, QWORD PTR [rcx+24]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+24], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- mov r14, QWORD PTR [rcx+32]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+32], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- mov r14, QWORD PTR [rcx+40]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+40], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- mov r14, QWORD PTR [rcx+48]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- mov r14, QWORD PTR [rcx+56]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+56], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- mov r14, QWORD PTR [rcx+64]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+64], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- mov r14, QWORD PTR [rcx+72]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+72], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- mov r14, QWORD PTR [rcx+80]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+80], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- mov r14, QWORD PTR [rcx+88]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+88], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- mov r14, QWORD PTR [rcx+96]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+96], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- mov r14, QWORD PTR [rcx+104]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+104], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- mov r14, QWORD PTR [rcx+112]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+112], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- mov r14, QWORD PTR [rcx+120]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+120], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+16] += m[16] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+128]
|
|
|
- mov r14, QWORD PTR [rcx+128]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+128], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+17] += m[17] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+136]
|
|
|
- mov r14, QWORD PTR [rcx+136]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+136], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+18] += m[18] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+144]
|
|
|
- mov r14, QWORD PTR [rcx+144]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+144], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+19] += m[19] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+152]
|
|
|
- mov r14, QWORD PTR [rcx+152]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+152], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+20] += m[20] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+160]
|
|
|
- mov r14, QWORD PTR [rcx+160]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+160], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+21] += m[21] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+168]
|
|
|
- mov r14, QWORD PTR [rcx+168]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+168], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+22] += m[22] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+176]
|
|
|
- mov r14, QWORD PTR [rcx+176]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+176], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+23] += m[23] * mu
|
|
|
- mov rax, r13
|
|
|
- mul QWORD PTR [r9+184]
|
|
|
- mov r14, QWORD PTR [rcx+184]
|
|
|
- add r12, rax
|
|
|
- adc rdx, rsi
|
|
|
- mov rsi, 0
|
|
|
- adc rsi, 0
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+184], r14
|
|
|
- adc QWORD PTR [rcx+192], rdx
|
|
|
- adc rsi, 0
|
|
|
- ; i -= 1
|
|
|
- add rcx, 8
|
|
|
- dec r10
|
|
|
- jnz L_3072_mont_reduce_24_loop
|
|
|
- mov QWORD PTR [rcx], r15
|
|
|
- mov QWORD PTR [rcx+8], rdi
|
|
|
- neg rsi
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, r9
|
|
|
- mov r9, rsi
|
|
|
-ELSE
|
|
|
- mov r9, rsi
|
|
|
- mov r8, r9
|
|
|
-ENDIF
|
|
|
- mov rdx, rcx
|
|
|
- mov rcx, rcx
|
|
|
- sub rcx, 192
|
|
|
- call sp_3072_cond_sub_24
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mont_reduce_24 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_cond_sub_avx2_24 PROC
|
|
|
- push r12
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- pext r12, r12, r9
|
|
|
- sub r10, r12
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+104]
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [rdx+128]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+136]
|
|
|
- mov r12, QWORD PTR [rdx+136]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+152]
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r12, QWORD PTR [rdx+160]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [rdx+176]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+184]
|
|
|
- mov r12, QWORD PTR [rdx+184]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- sbb r12, r10
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- sbb rax, rax
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_cond_sub_avx2_24 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mul_d_24 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- mov r10, rax
|
|
|
- mov r11, rdx
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- ; A[1] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[16] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+128]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[17] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+136]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[18] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+144]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[19] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+152]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[20] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+160]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[21] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+168]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[22] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+176]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[23] * B
|
|
|
- mov rax, r8
|
|
|
- mul QWORD PTR [r9+184]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mul_d_24 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mul_d_avx2_24 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rdx, r8
|
|
|
- xor r13, r13
|
|
|
- mulx r12, r11, QWORD PTR [rax]
|
|
|
- mov QWORD PTR [rcx], r11
|
|
|
- ; A[1] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+8], r12
|
|
|
- ; A[2] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- ; A[3] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- ; A[4] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- ; A[5] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- ; A[6] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+48]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- ; A[7] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+56]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- ; A[8] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+64]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- ; A[9] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+72]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+72], r12
|
|
|
- ; A[10] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+80]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- ; A[11] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+88]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- ; A[12] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+96]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+96], r11
|
|
|
- ; A[13] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+104]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+104], r12
|
|
|
- ; A[14] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+112]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- ; A[15] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+120]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+120], r12
|
|
|
- ; A[16] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+128]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- ; A[17] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+136]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- ; A[18] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+144]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+144], r11
|
|
|
- ; A[19] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+152]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+152], r12
|
|
|
- ; A[20] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+160]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+160], r11
|
|
|
- ; A[21] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+168]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+168], r12
|
|
|
- ; A[22] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+176]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- ; A[23] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+184]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- adcx r11, r13
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- mov QWORD PTR [rcx+192], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mul_d_avx2_24 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF _WIN64
|
|
|
-; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
|
|
|
-; *
|
|
|
-; * d1 The high order half of the number to divide.
|
|
|
-; * d0 The low order half of the number to divide.
|
|
|
-; * div The dividend.
|
|
|
-; * returns the result of the division.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-div_3072_word_asm_24 PROC
|
|
|
- mov r9, rdx
|
|
|
- mov rax, r9
|
|
|
- mov rdx, rcx
|
|
|
- div r8
|
|
|
- ret
|
|
|
-div_3072_word_asm_24 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Compare a with b in constant time.
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; * return -ve, 0 or +ve if a is less than, equal to or greater than b
|
|
|
-; * respectively.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_cmp_24 PROC
|
|
|
- push r12
|
|
|
- xor r9, r9
|
|
|
- mov r8, -1
|
|
|
- mov rax, -1
|
|
|
- mov r10, 1
|
|
|
- mov r11, QWORD PTR [rcx+184]
|
|
|
- mov r12, QWORD PTR [rdx+184]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+176]
|
|
|
- mov r12, QWORD PTR [rdx+176]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+168]
|
|
|
- mov r12, QWORD PTR [rdx+168]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+160]
|
|
|
- mov r12, QWORD PTR [rdx+160]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+152]
|
|
|
- mov r12, QWORD PTR [rdx+152]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+144]
|
|
|
- mov r12, QWORD PTR [rdx+144]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+136]
|
|
|
- mov r12, QWORD PTR [rdx+136]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+128]
|
|
|
- mov r12, QWORD PTR [rdx+128]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+120]
|
|
|
- mov r12, QWORD PTR [rdx+120]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+104]
|
|
|
- mov r12, QWORD PTR [rdx+104]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+96]
|
|
|
- mov r12, QWORD PTR [rdx+96]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+80]
|
|
|
- mov r12, QWORD PTR [rdx+80]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+72]
|
|
|
- mov r12, QWORD PTR [rdx+72]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- mov r12, QWORD PTR [rdx+56]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+48]
|
|
|
- mov r12, QWORD PTR [rdx+48]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+32]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+8]
|
|
|
- mov r12, QWORD PTR [rdx+8]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx]
|
|
|
- mov r12, QWORD PTR [rdx]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- xor rax, r8
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_cmp_24 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_get_from_table_24 PROC
|
|
|
- sub rsp, 128
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- mov rax, 1
|
|
|
- movd xmm10, r8
|
|
|
- movd xmm11, rax
|
|
|
- pxor xmm13, xmm13
|
|
|
- pshufd xmm11, xmm11, 0
|
|
|
- pshufd xmm10, xmm10, 0
|
|
|
- ; START: 0-7
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 0-7
|
|
|
- ; START: 8-15
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 8-15
|
|
|
- ; START: 16-23
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- ; END: 16-23
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_3072_get_from_table_24 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Reduce the number back to 3072 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mont_reduce_avx2_24 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- mov r9, rcx
|
|
|
- mov r10, rdx
|
|
|
- xor rbp, rbp
|
|
|
- ; i = 24
|
|
|
- mov r11, 24
|
|
|
- mov r14, QWORD PTR [r9]
|
|
|
- mov r15, QWORD PTR [r9+8]
|
|
|
- mov rdi, QWORD PTR [r9+16]
|
|
|
- mov rsi, QWORD PTR [r9+24]
|
|
|
- add r9, 96
|
|
|
- xor rbp, rbp
|
|
|
-L_3072_mont_reduce_avx2_24_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r12, r14
|
|
|
- imul rdx, r8
|
|
|
- xor rbx, rbx
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10]
|
|
|
- mov r14, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r14, rcx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+8]
|
|
|
- mov r15, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+16]
|
|
|
- mov rdi, rsi
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+24]
|
|
|
- mov rsi, QWORD PTR [r9+-64]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+32]
|
|
|
- mov r13, QWORD PTR [r9+-56]
|
|
|
- adcx rsi, rax
|
|
|
- adox r13, rcx
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+40]
|
|
|
- mov r12, QWORD PTR [r9+-48]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-56], r13
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+48]
|
|
|
- mov r13, QWORD PTR [r9+-40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-48], r12
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+56]
|
|
|
- mov r12, QWORD PTR [r9+-32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-40], r13
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+64]
|
|
|
- mov r13, QWORD PTR [r9+-24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-32], r12
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+72]
|
|
|
- mov r12, QWORD PTR [r9+-16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-24], r13
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+80]
|
|
|
- mov r13, QWORD PTR [r9+-8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-16], r12
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+88]
|
|
|
- mov r12, QWORD PTR [r9]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-8], r13
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+96]
|
|
|
- mov r13, QWORD PTR [r9+8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9], r12
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+104]
|
|
|
- mov r12, QWORD PTR [r9+16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+8], r13
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+112]
|
|
|
- mov r13, QWORD PTR [r9+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+16], r12
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+120]
|
|
|
- mov r12, QWORD PTR [r9+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+24], r13
|
|
|
- ; a[i+16] += m[16] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+128]
|
|
|
- mov r13, QWORD PTR [r9+40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+32], r12
|
|
|
- ; a[i+17] += m[17] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+136]
|
|
|
- mov r12, QWORD PTR [r9+48]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+40], r13
|
|
|
- ; a[i+18] += m[18] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+144]
|
|
|
- mov r13, QWORD PTR [r9+56]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+48], r12
|
|
|
- ; a[i+19] += m[19] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+152]
|
|
|
- mov r12, QWORD PTR [r9+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+56], r13
|
|
|
- ; a[i+20] += m[20] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+160]
|
|
|
- mov r13, QWORD PTR [r9+72]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+64], r12
|
|
|
- ; a[i+21] += m[21] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+168]
|
|
|
- mov r12, QWORD PTR [r9+80]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+72], r13
|
|
|
- ; a[i+22] += m[22] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+176]
|
|
|
- mov r13, QWORD PTR [r9+88]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+80], r12
|
|
|
- ; a[i+23] += m[23] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+184]
|
|
|
- mov r12, QWORD PTR [r9+96]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+88], r13
|
|
|
- adcx r12, rbp
|
|
|
- mov rbp, rbx
|
|
|
- mov QWORD PTR [r9+96], r12
|
|
|
- adox rbp, rbx
|
|
|
- adcx rbp, rbx
|
|
|
- ; a += 1
|
|
|
- add r9, 8
|
|
|
- ; i -= 1
|
|
|
- sub r11, 1
|
|
|
- jnz L_3072_mont_reduce_avx2_24_loop
|
|
|
- sub r9, 96
|
|
|
- neg rbp
|
|
|
- mov r8, r9
|
|
|
- sub r9, 192
|
|
|
- mov rcx, QWORD PTR [r10]
|
|
|
- mov rdx, r14
|
|
|
- pext rcx, rcx, rbp
|
|
|
- sub rdx, rcx
|
|
|
- mov rcx, QWORD PTR [r10+8]
|
|
|
- mov rax, r15
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+16]
|
|
|
- mov rcx, rdi
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+8], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov rdx, rsi
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+16], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+24], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+40]
|
|
|
- mov rcx, QWORD PTR [r8+40]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+32], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- mov rdx, QWORD PTR [r8+48]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+40], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+56]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+48], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+64]
|
|
|
- mov rcx, QWORD PTR [r8+64]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+56], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov rdx, QWORD PTR [r8+72]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+64], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+72], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+88]
|
|
|
- mov rcx, QWORD PTR [r8+88]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+80], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- mov rdx, QWORD PTR [r8+96]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+88], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+104]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+96], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+112]
|
|
|
- mov rcx, QWORD PTR [r8+112]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+104], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov rdx, QWORD PTR [r8+120]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+112], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+128]
|
|
|
- mov rax, QWORD PTR [r8+128]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+120], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+136]
|
|
|
- mov rcx, QWORD PTR [r8+136]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+128], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+144]
|
|
|
- mov rdx, QWORD PTR [r8+144]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+136], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+152]
|
|
|
- mov rax, QWORD PTR [r8+152]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+144], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+160]
|
|
|
- mov rcx, QWORD PTR [r8+160]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+152], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov rdx, QWORD PTR [r8+168]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+160], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+176]
|
|
|
- mov rax, QWORD PTR [r8+176]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+168], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+184]
|
|
|
- mov rcx, QWORD PTR [r8+184]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+176], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov QWORD PTR [r9+184], rcx
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mont_reduce_avx2_24 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_get_from_table_avx2_24 PROC
|
|
|
- sub rsp, 128
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- mov rax, 1
|
|
|
- movd xmm10, r8
|
|
|
- movd xmm11, rax
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpermd ymm10, ymm13, ymm10
|
|
|
- vpermd ymm11, ymm13, ymm11
|
|
|
- ; START: 0-15
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpxor ymm7, ymm7, ymm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- vmovdqu YMMWORD PTR [rcx+64], ymm6
|
|
|
- vmovdqu YMMWORD PTR [rcx+96], ymm7
|
|
|
- add rcx, 128
|
|
|
- ; END: 0-15
|
|
|
- ; START: 16-23
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 16
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 17
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 18
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 19
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 20
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 21
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 22
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 23
|
|
|
- mov r9, QWORD PTR [rdx+184]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 24
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 25
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 26
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 27
|
|
|
- mov r9, QWORD PTR [rdx+216]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 28
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 29
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 30
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 31
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- ; END: 16-23
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_3072_get_from_table_avx2_24 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_cond_sub_48 PROC
|
|
|
- sub rsp, 384
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov r10, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [r8+136]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+128], r10
|
|
|
- mov QWORD PTR [rsp+136], r11
|
|
|
- mov r10, QWORD PTR [r8+144]
|
|
|
- mov r11, QWORD PTR [r8+152]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+144], r10
|
|
|
- mov QWORD PTR [rsp+152], r11
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+160], r10
|
|
|
- mov QWORD PTR [rsp+168], r11
|
|
|
- mov r10, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [r8+184]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+176], r10
|
|
|
- mov QWORD PTR [rsp+184], r11
|
|
|
- mov r10, QWORD PTR [r8+192]
|
|
|
- mov r11, QWORD PTR [r8+200]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+192], r10
|
|
|
- mov QWORD PTR [rsp+200], r11
|
|
|
- mov r10, QWORD PTR [r8+208]
|
|
|
- mov r11, QWORD PTR [r8+216]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+208], r10
|
|
|
- mov QWORD PTR [rsp+216], r11
|
|
|
- mov r10, QWORD PTR [r8+224]
|
|
|
- mov r11, QWORD PTR [r8+232]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+224], r10
|
|
|
- mov QWORD PTR [rsp+232], r11
|
|
|
- mov r10, QWORD PTR [r8+240]
|
|
|
- mov r11, QWORD PTR [r8+248]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+240], r10
|
|
|
- mov QWORD PTR [rsp+248], r11
|
|
|
- mov r10, QWORD PTR [r8+256]
|
|
|
- mov r11, QWORD PTR [r8+264]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+256], r10
|
|
|
- mov QWORD PTR [rsp+264], r11
|
|
|
- mov r10, QWORD PTR [r8+272]
|
|
|
- mov r11, QWORD PTR [r8+280]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+272], r10
|
|
|
- mov QWORD PTR [rsp+280], r11
|
|
|
- mov r10, QWORD PTR [r8+288]
|
|
|
- mov r11, QWORD PTR [r8+296]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+288], r10
|
|
|
- mov QWORD PTR [rsp+296], r11
|
|
|
- mov r10, QWORD PTR [r8+304]
|
|
|
- mov r11, QWORD PTR [r8+312]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+304], r10
|
|
|
- mov QWORD PTR [rsp+312], r11
|
|
|
- mov r10, QWORD PTR [r8+320]
|
|
|
- mov r11, QWORD PTR [r8+328]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+320], r10
|
|
|
- mov QWORD PTR [rsp+328], r11
|
|
|
- mov r10, QWORD PTR [r8+336]
|
|
|
- mov r11, QWORD PTR [r8+344]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+336], r10
|
|
|
- mov QWORD PTR [rsp+344], r11
|
|
|
- mov r10, QWORD PTR [r8+352]
|
|
|
- mov r11, QWORD PTR [r8+360]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+352], r10
|
|
|
- mov QWORD PTR [rsp+360], r11
|
|
|
- mov r10, QWORD PTR [r8+368]
|
|
|
- mov r11, QWORD PTR [r8+376]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+368], r10
|
|
|
- mov QWORD PTR [rsp+376], r11
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rsp]
|
|
|
- sub r10, r8
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rsp+8]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r8, QWORD PTR [rsp+16]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rsp+24]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rsp+32]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rsp+40]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r8, QWORD PTR [rsp+48]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rsp+56]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov r8, QWORD PTR [rsp+64]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov r11, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rsp+72]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r8, QWORD PTR [rsp+80]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rsp+88]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- mov r8, QWORD PTR [rsp+96]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rsp+104]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r8, QWORD PTR [rsp+112]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rsp+120]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov r10, QWORD PTR [rdx+128]
|
|
|
- mov r8, QWORD PTR [rsp+128]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- mov r11, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [rsp+136]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+128], r10
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- mov r8, QWORD PTR [rsp+144]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+136], r11
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rsp+152]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- mov r10, QWORD PTR [rdx+160]
|
|
|
- mov r8, QWORD PTR [rsp+160]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- mov r11, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [rsp+168]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+160], r10
|
|
|
- mov r10, QWORD PTR [rdx+176]
|
|
|
- mov r8, QWORD PTR [rsp+176]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+168], r11
|
|
|
- mov r11, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rsp+184]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+176], r10
|
|
|
- mov r10, QWORD PTR [rdx+192]
|
|
|
- mov r8, QWORD PTR [rsp+192]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+184], r11
|
|
|
- mov r11, QWORD PTR [rdx+200]
|
|
|
- mov r8, QWORD PTR [rsp+200]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- mov r10, QWORD PTR [rdx+208]
|
|
|
- mov r8, QWORD PTR [rsp+208]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- mov r11, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [rsp+216]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+208], r10
|
|
|
- mov r10, QWORD PTR [rdx+224]
|
|
|
- mov r8, QWORD PTR [rsp+224]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+216], r11
|
|
|
- mov r11, QWORD PTR [rdx+232]
|
|
|
- mov r8, QWORD PTR [rsp+232]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+224], r10
|
|
|
- mov r10, QWORD PTR [rdx+240]
|
|
|
- mov r8, QWORD PTR [rsp+240]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+232], r11
|
|
|
- mov r11, QWORD PTR [rdx+248]
|
|
|
- mov r8, QWORD PTR [rsp+248]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- mov r10, QWORD PTR [rdx+256]
|
|
|
- mov r8, QWORD PTR [rsp+256]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- mov r11, QWORD PTR [rdx+264]
|
|
|
- mov r8, QWORD PTR [rsp+264]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+256], r10
|
|
|
- mov r10, QWORD PTR [rdx+272]
|
|
|
- mov r8, QWORD PTR [rsp+272]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+264], r11
|
|
|
- mov r11, QWORD PTR [rdx+280]
|
|
|
- mov r8, QWORD PTR [rsp+280]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+272], r10
|
|
|
- mov r10, QWORD PTR [rdx+288]
|
|
|
- mov r8, QWORD PTR [rsp+288]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+280], r11
|
|
|
- mov r11, QWORD PTR [rdx+296]
|
|
|
- mov r8, QWORD PTR [rsp+296]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+288], r10
|
|
|
- mov r10, QWORD PTR [rdx+304]
|
|
|
- mov r8, QWORD PTR [rsp+304]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+296], r11
|
|
|
- mov r11, QWORD PTR [rdx+312]
|
|
|
- mov r8, QWORD PTR [rsp+312]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+304], r10
|
|
|
- mov r10, QWORD PTR [rdx+320]
|
|
|
- mov r8, QWORD PTR [rsp+320]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+312], r11
|
|
|
- mov r11, QWORD PTR [rdx+328]
|
|
|
- mov r8, QWORD PTR [rsp+328]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+320], r10
|
|
|
- mov r10, QWORD PTR [rdx+336]
|
|
|
- mov r8, QWORD PTR [rsp+336]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+328], r11
|
|
|
- mov r11, QWORD PTR [rdx+344]
|
|
|
- mov r8, QWORD PTR [rsp+344]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+336], r10
|
|
|
- mov r10, QWORD PTR [rdx+352]
|
|
|
- mov r8, QWORD PTR [rsp+352]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+344], r11
|
|
|
- mov r11, QWORD PTR [rdx+360]
|
|
|
- mov r8, QWORD PTR [rsp+360]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+352], r10
|
|
|
- mov r10, QWORD PTR [rdx+368]
|
|
|
- mov r8, QWORD PTR [rsp+368]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+360], r11
|
|
|
- mov r11, QWORD PTR [rdx+376]
|
|
|
- mov r8, QWORD PTR [rsp+376]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+368], r10
|
|
|
- mov QWORD PTR [rcx+376], r11
|
|
|
- sbb rax, rax
|
|
|
- add rsp, 384
|
|
|
- ret
|
|
|
-sp_3072_cond_sub_48 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 3072 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mont_reduce_48 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov r9, rdx
|
|
|
- xor rsi, rsi
|
|
|
- ; i = 48
|
|
|
- mov r10, 48
|
|
|
- mov r15, QWORD PTR [rcx]
|
|
|
- mov rdi, QWORD PTR [rcx+8]
|
|
|
-L_3072_mont_reduce_48_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov r13, r15
|
|
|
- imul r13, r8
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- add r15, rax
|
|
|
- adc r12, rdx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- mov r15, rdi
|
|
|
- add r15, rax
|
|
|
- adc r11, rdx
|
|
|
- add r15, r12
|
|
|
- adc r11, 0
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- mov rdi, QWORD PTR [rcx+16]
|
|
|
- add rdi, rax
|
|
|
- adc r12, rdx
|
|
|
- add rdi, r11
|
|
|
- adc r12, 0
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- mov r14, QWORD PTR [rcx+24]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+24], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- mov r14, QWORD PTR [rcx+32]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+32], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- mov r14, QWORD PTR [rcx+40]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+40], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- mov r14, QWORD PTR [rcx+48]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- mov r14, QWORD PTR [rcx+56]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+56], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- mov r14, QWORD PTR [rcx+64]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+64], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- mov r14, QWORD PTR [rcx+72]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+72], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- mov r14, QWORD PTR [rcx+80]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+80], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- mov r14, QWORD PTR [rcx+88]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+88], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- mov r14, QWORD PTR [rcx+96]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+96], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- mov r14, QWORD PTR [rcx+104]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+104], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- mov r14, QWORD PTR [rcx+112]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+112], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- mov r14, QWORD PTR [rcx+120]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+120], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+16] += m[16] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+128]
|
|
|
- mov r14, QWORD PTR [rcx+128]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+128], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+17] += m[17] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+136]
|
|
|
- mov r14, QWORD PTR [rcx+136]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+136], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+18] += m[18] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+144]
|
|
|
- mov r14, QWORD PTR [rcx+144]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+144], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+19] += m[19] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+152]
|
|
|
- mov r14, QWORD PTR [rcx+152]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+152], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+20] += m[20] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+160]
|
|
|
- mov r14, QWORD PTR [rcx+160]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+160], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+21] += m[21] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+168]
|
|
|
- mov r14, QWORD PTR [rcx+168]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+168], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+22] += m[22] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+176]
|
|
|
- mov r14, QWORD PTR [rcx+176]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+176], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+23] += m[23] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+184]
|
|
|
- mov r14, QWORD PTR [rcx+184]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+184], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+24] += m[24] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+192]
|
|
|
- mov r14, QWORD PTR [rcx+192]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+192], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+25] += m[25] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+200]
|
|
|
- mov r14, QWORD PTR [rcx+200]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+200], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+26] += m[26] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+208]
|
|
|
- mov r14, QWORD PTR [rcx+208]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+208], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+27] += m[27] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+216]
|
|
|
- mov r14, QWORD PTR [rcx+216]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+216], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+28] += m[28] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+224]
|
|
|
- mov r14, QWORD PTR [rcx+224]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+224], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+29] += m[29] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+232]
|
|
|
- mov r14, QWORD PTR [rcx+232]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+232], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+30] += m[30] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+240]
|
|
|
- mov r14, QWORD PTR [rcx+240]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+240], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+31] += m[31] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+248]
|
|
|
- mov r14, QWORD PTR [rcx+248]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+248], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+32] += m[32] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+256]
|
|
|
- mov r14, QWORD PTR [rcx+256]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+256], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+33] += m[33] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+264]
|
|
|
- mov r14, QWORD PTR [rcx+264]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+264], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+34] += m[34] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+272]
|
|
|
- mov r14, QWORD PTR [rcx+272]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+272], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+35] += m[35] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+280]
|
|
|
- mov r14, QWORD PTR [rcx+280]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+280], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+36] += m[36] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+288]
|
|
|
- mov r14, QWORD PTR [rcx+288]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+288], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+37] += m[37] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+296]
|
|
|
- mov r14, QWORD PTR [rcx+296]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+296], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+38] += m[38] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+304]
|
|
|
- mov r14, QWORD PTR [rcx+304]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+304], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+39] += m[39] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+312]
|
|
|
- mov r14, QWORD PTR [rcx+312]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+312], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+40] += m[40] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+320]
|
|
|
- mov r14, QWORD PTR [rcx+320]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+320], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+41] += m[41] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+328]
|
|
|
- mov r14, QWORD PTR [rcx+328]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+328], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+42] += m[42] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+336]
|
|
|
- mov r14, QWORD PTR [rcx+336]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+336], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+43] += m[43] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+344]
|
|
|
- mov r14, QWORD PTR [rcx+344]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+344], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+44] += m[44] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+352]
|
|
|
- mov r14, QWORD PTR [rcx+352]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+352], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+45] += m[45] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+360]
|
|
|
- mov r14, QWORD PTR [rcx+360]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+360], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+46] += m[46] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+368]
|
|
|
- mov r14, QWORD PTR [rcx+368]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+368], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+47] += m[47] * mu
|
|
|
- mov rax, r13
|
|
|
- mul QWORD PTR [r9+376]
|
|
|
- mov r14, QWORD PTR [rcx+376]
|
|
|
- add r12, rax
|
|
|
- adc rdx, rsi
|
|
|
- mov rsi, 0
|
|
|
- adc rsi, 0
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+376], r14
|
|
|
- adc QWORD PTR [rcx+384], rdx
|
|
|
- adc rsi, 0
|
|
|
- ; i -= 1
|
|
|
- add rcx, 8
|
|
|
- dec r10
|
|
|
- jnz L_3072_mont_reduce_48_loop
|
|
|
- mov QWORD PTR [rcx], r15
|
|
|
- mov QWORD PTR [rcx+8], rdi
|
|
|
- neg rsi
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, r9
|
|
|
- mov r9, rsi
|
|
|
-ELSE
|
|
|
- mov r9, rsi
|
|
|
- mov r8, r9
|
|
|
-ENDIF
|
|
|
- mov rdx, rcx
|
|
|
- mov rcx, rcx
|
|
|
- sub rcx, 384
|
|
|
- call sp_3072_cond_sub_48
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mont_reduce_48 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Sub b from a into r. (r = a - b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_sub_48 PROC
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- sub r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- sbb r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- sbb r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- sbb r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- sbb r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- sbb r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- sbb r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- sbb r9, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- sbb r10, QWORD PTR [r8+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- sbb r9, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+80], r9
|
|
|
- sbb r10, QWORD PTR [r8+88]
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- sbb r9, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [rcx+96], r9
|
|
|
- sbb r10, QWORD PTR [r8+104]
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- sbb r9, QWORD PTR [r8+112]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+112], r9
|
|
|
- sbb r10, QWORD PTR [r8+120]
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- sbb r9, QWORD PTR [r8+128]
|
|
|
- mov r10, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [rcx+128], r9
|
|
|
- sbb r10, QWORD PTR [r8+136]
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [rcx+136], r10
|
|
|
- sbb r9, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [rcx+144], r9
|
|
|
- sbb r10, QWORD PTR [r8+152]
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [rcx+152], r10
|
|
|
- sbb r9, QWORD PTR [r8+160]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [rcx+160], r9
|
|
|
- sbb r10, QWORD PTR [r8+168]
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- sbb r9, QWORD PTR [r8+176]
|
|
|
- mov r10, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [rcx+176], r9
|
|
|
- sbb r10, QWORD PTR [r8+184]
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- mov QWORD PTR [rcx+184], r10
|
|
|
- sbb r9, QWORD PTR [r8+192]
|
|
|
- mov r10, QWORD PTR [rdx+200]
|
|
|
- mov QWORD PTR [rcx+192], r9
|
|
|
- sbb r10, QWORD PTR [r8+200]
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- mov QWORD PTR [rcx+200], r10
|
|
|
- sbb r9, QWORD PTR [r8+208]
|
|
|
- mov r10, QWORD PTR [rdx+216]
|
|
|
- mov QWORD PTR [rcx+208], r9
|
|
|
- sbb r10, QWORD PTR [r8+216]
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- sbb r9, QWORD PTR [r8+224]
|
|
|
- mov r10, QWORD PTR [rdx+232]
|
|
|
- mov QWORD PTR [rcx+224], r9
|
|
|
- sbb r10, QWORD PTR [r8+232]
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- mov QWORD PTR [rcx+232], r10
|
|
|
- sbb r9, QWORD PTR [r8+240]
|
|
|
- mov r10, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [rcx+240], r9
|
|
|
- sbb r10, QWORD PTR [r8+248]
|
|
|
- mov r9, QWORD PTR [rdx+256]
|
|
|
- mov QWORD PTR [rcx+248], r10
|
|
|
- sbb r9, QWORD PTR [r8+256]
|
|
|
- mov r10, QWORD PTR [rdx+264]
|
|
|
- mov QWORD PTR [rcx+256], r9
|
|
|
- sbb r10, QWORD PTR [r8+264]
|
|
|
- mov r9, QWORD PTR [rdx+272]
|
|
|
- mov QWORD PTR [rcx+264], r10
|
|
|
- sbb r9, QWORD PTR [r8+272]
|
|
|
- mov r10, QWORD PTR [rdx+280]
|
|
|
- mov QWORD PTR [rcx+272], r9
|
|
|
- sbb r10, QWORD PTR [r8+280]
|
|
|
- mov r9, QWORD PTR [rdx+288]
|
|
|
- mov QWORD PTR [rcx+280], r10
|
|
|
- sbb r9, QWORD PTR [r8+288]
|
|
|
- mov r10, QWORD PTR [rdx+296]
|
|
|
- mov QWORD PTR [rcx+288], r9
|
|
|
- sbb r10, QWORD PTR [r8+296]
|
|
|
- mov r9, QWORD PTR [rdx+304]
|
|
|
- mov QWORD PTR [rcx+296], r10
|
|
|
- sbb r9, QWORD PTR [r8+304]
|
|
|
- mov r10, QWORD PTR [rdx+312]
|
|
|
- mov QWORD PTR [rcx+304], r9
|
|
|
- sbb r10, QWORD PTR [r8+312]
|
|
|
- mov r9, QWORD PTR [rdx+320]
|
|
|
- mov QWORD PTR [rcx+312], r10
|
|
|
- sbb r9, QWORD PTR [r8+320]
|
|
|
- mov r10, QWORD PTR [rdx+328]
|
|
|
- mov QWORD PTR [rcx+320], r9
|
|
|
- sbb r10, QWORD PTR [r8+328]
|
|
|
- mov r9, QWORD PTR [rdx+336]
|
|
|
- mov QWORD PTR [rcx+328], r10
|
|
|
- sbb r9, QWORD PTR [r8+336]
|
|
|
- mov r10, QWORD PTR [rdx+344]
|
|
|
- mov QWORD PTR [rcx+336], r9
|
|
|
- sbb r10, QWORD PTR [r8+344]
|
|
|
- mov r9, QWORD PTR [rdx+352]
|
|
|
- mov QWORD PTR [rcx+344], r10
|
|
|
- sbb r9, QWORD PTR [r8+352]
|
|
|
- mov r10, QWORD PTR [rdx+360]
|
|
|
- mov QWORD PTR [rcx+352], r9
|
|
|
- sbb r10, QWORD PTR [r8+360]
|
|
|
- mov r9, QWORD PTR [rdx+368]
|
|
|
- mov QWORD PTR [rcx+360], r10
|
|
|
- sbb r9, QWORD PTR [r8+368]
|
|
|
- mov r10, QWORD PTR [rdx+376]
|
|
|
- mov QWORD PTR [rcx+368], r9
|
|
|
- sbb r10, QWORD PTR [r8+376]
|
|
|
- mov QWORD PTR [rcx+376], r10
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_3072_sub_48 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mul_d_avx2_48 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rdx, r8
|
|
|
- xor r13, r13
|
|
|
- mulx r12, r11, QWORD PTR [rax]
|
|
|
- mov QWORD PTR [rcx], r11
|
|
|
- ; A[1] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+8], r12
|
|
|
- ; A[2] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- ; A[3] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- ; A[4] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- ; A[5] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- ; A[6] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+48]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- ; A[7] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+56]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- ; A[8] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+64]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- ; A[9] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+72]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+72], r12
|
|
|
- ; A[10] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+80]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- ; A[11] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+88]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- ; A[12] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+96]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+96], r11
|
|
|
- ; A[13] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+104]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+104], r12
|
|
|
- ; A[14] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+112]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- ; A[15] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+120]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+120], r12
|
|
|
- ; A[16] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+128]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- ; A[17] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+136]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- ; A[18] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+144]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+144], r11
|
|
|
- ; A[19] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+152]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+152], r12
|
|
|
- ; A[20] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+160]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+160], r11
|
|
|
- ; A[21] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+168]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+168], r12
|
|
|
- ; A[22] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+176]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- ; A[23] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+184]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- ; A[24] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+192]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+192], r11
|
|
|
- ; A[25] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+200]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+200], r12
|
|
|
- ; A[26] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+208]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+208], r11
|
|
|
- ; A[27] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+216]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+216], r12
|
|
|
- ; A[28] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+224]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- ; A[29] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+232]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- ; A[30] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+240]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+240], r11
|
|
|
- ; A[31] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+248]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+248], r12
|
|
|
- ; A[32] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+256]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+256], r11
|
|
|
- ; A[33] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+264]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+264], r12
|
|
|
- ; A[34] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+272]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+272], r11
|
|
|
- ; A[35] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+280]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+280], r12
|
|
|
- ; A[36] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+288]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+288], r11
|
|
|
- ; A[37] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+296]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+296], r12
|
|
|
- ; A[38] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+304]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+304], r11
|
|
|
- ; A[39] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+312]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+312], r12
|
|
|
- ; A[40] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+320]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+320], r11
|
|
|
- ; A[41] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+328]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+328], r12
|
|
|
- ; A[42] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+336]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+336], r11
|
|
|
- ; A[43] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+344]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+344], r12
|
|
|
- ; A[44] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+352]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+352], r11
|
|
|
- ; A[45] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+360]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+360], r12
|
|
|
- ; A[46] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+368]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+368], r11
|
|
|
- ; A[47] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+376]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- adcx r11, r13
|
|
|
- mov QWORD PTR [rcx+376], r12
|
|
|
- mov QWORD PTR [rcx+384], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mul_d_avx2_48 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF _WIN64
|
|
|
-; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
|
|
|
-; *
|
|
|
-; * d1 The high order half of the number to divide.
|
|
|
-; * d0 The low order half of the number to divide.
|
|
|
-; * div The dividend.
|
|
|
-; * returns the result of the division.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-div_3072_word_asm_48 PROC
|
|
|
- mov r9, rdx
|
|
|
- mov rax, r9
|
|
|
- mov rdx, rcx
|
|
|
- div r8
|
|
|
- ret
|
|
|
-div_3072_word_asm_48 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_cond_sub_avx2_48 PROC
|
|
|
- push r12
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- pext r12, r12, r9
|
|
|
- sub r10, r12
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+104]
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [rdx+128]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+136]
|
|
|
- mov r12, QWORD PTR [rdx+136]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+152]
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r12, QWORD PTR [rdx+160]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [rdx+176]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+184]
|
|
|
- mov r12, QWORD PTR [rdx+184]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+192]
|
|
|
- mov r10, QWORD PTR [rdx+192]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+200]
|
|
|
- mov r11, QWORD PTR [rdx+200]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+208]
|
|
|
- mov r12, QWORD PTR [rdx+208]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+216]
|
|
|
- mov r10, QWORD PTR [rdx+216]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+208], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+224]
|
|
|
- mov r11, QWORD PTR [rdx+224]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+232]
|
|
|
- mov r12, QWORD PTR [rdx+232]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+240]
|
|
|
- mov r10, QWORD PTR [rdx+240]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+248]
|
|
|
- mov r11, QWORD PTR [rdx+248]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+256]
|
|
|
- mov r12, QWORD PTR [rdx+256]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+264]
|
|
|
- mov r10, QWORD PTR [rdx+264]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+256], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+272]
|
|
|
- mov r11, QWORD PTR [rdx+272]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+264], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+280]
|
|
|
- mov r12, QWORD PTR [rdx+280]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+272], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+288]
|
|
|
- mov r10, QWORD PTR [rdx+288]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+280], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+296]
|
|
|
- mov r11, QWORD PTR [rdx+296]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+288], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+304]
|
|
|
- mov r12, QWORD PTR [rdx+304]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+296], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+312]
|
|
|
- mov r10, QWORD PTR [rdx+312]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+304], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+320]
|
|
|
- mov r11, QWORD PTR [rdx+320]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+312], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+328]
|
|
|
- mov r12, QWORD PTR [rdx+328]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+320], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+336]
|
|
|
- mov r10, QWORD PTR [rdx+336]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+328], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+344]
|
|
|
- mov r11, QWORD PTR [rdx+344]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+336], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+352]
|
|
|
- mov r12, QWORD PTR [rdx+352]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+344], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+360]
|
|
|
- mov r10, QWORD PTR [rdx+360]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+352], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+368]
|
|
|
- mov r11, QWORD PTR [rdx+368]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+360], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+376]
|
|
|
- mov r12, QWORD PTR [rdx+376]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+368], r11
|
|
|
- sbb r12, r10
|
|
|
- mov QWORD PTR [rcx+376], r12
|
|
|
- sbb rax, rax
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_cond_sub_avx2_48 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Compare a with b in constant time.
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; * return -ve, 0 or +ve if a is less than, equal to or greater than b
|
|
|
-; * respectively.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_cmp_48 PROC
|
|
|
- push r12
|
|
|
- xor r9, r9
|
|
|
- mov r8, -1
|
|
|
- mov rax, -1
|
|
|
- mov r10, 1
|
|
|
- mov r11, QWORD PTR [rcx+376]
|
|
|
- mov r12, QWORD PTR [rdx+376]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+368]
|
|
|
- mov r12, QWORD PTR [rdx+368]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+360]
|
|
|
- mov r12, QWORD PTR [rdx+360]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+352]
|
|
|
- mov r12, QWORD PTR [rdx+352]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+344]
|
|
|
- mov r12, QWORD PTR [rdx+344]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+336]
|
|
|
- mov r12, QWORD PTR [rdx+336]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+328]
|
|
|
- mov r12, QWORD PTR [rdx+328]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+320]
|
|
|
- mov r12, QWORD PTR [rdx+320]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+312]
|
|
|
- mov r12, QWORD PTR [rdx+312]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+304]
|
|
|
- mov r12, QWORD PTR [rdx+304]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+296]
|
|
|
- mov r12, QWORD PTR [rdx+296]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+288]
|
|
|
- mov r12, QWORD PTR [rdx+288]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+280]
|
|
|
- mov r12, QWORD PTR [rdx+280]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+272]
|
|
|
- mov r12, QWORD PTR [rdx+272]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+264]
|
|
|
- mov r12, QWORD PTR [rdx+264]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+256]
|
|
|
- mov r12, QWORD PTR [rdx+256]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+248]
|
|
|
- mov r12, QWORD PTR [rdx+248]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+240]
|
|
|
- mov r12, QWORD PTR [rdx+240]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+232]
|
|
|
- mov r12, QWORD PTR [rdx+232]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+224]
|
|
|
- mov r12, QWORD PTR [rdx+224]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+216]
|
|
|
- mov r12, QWORD PTR [rdx+216]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+208]
|
|
|
- mov r12, QWORD PTR [rdx+208]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+200]
|
|
|
- mov r12, QWORD PTR [rdx+200]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+192]
|
|
|
- mov r12, QWORD PTR [rdx+192]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+184]
|
|
|
- mov r12, QWORD PTR [rdx+184]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+176]
|
|
|
- mov r12, QWORD PTR [rdx+176]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+168]
|
|
|
- mov r12, QWORD PTR [rdx+168]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+160]
|
|
|
- mov r12, QWORD PTR [rdx+160]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+152]
|
|
|
- mov r12, QWORD PTR [rdx+152]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+144]
|
|
|
- mov r12, QWORD PTR [rdx+144]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+136]
|
|
|
- mov r12, QWORD PTR [rdx+136]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+128]
|
|
|
- mov r12, QWORD PTR [rdx+128]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+120]
|
|
|
- mov r12, QWORD PTR [rdx+120]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+104]
|
|
|
- mov r12, QWORD PTR [rdx+104]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+96]
|
|
|
- mov r12, QWORD PTR [rdx+96]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+80]
|
|
|
- mov r12, QWORD PTR [rdx+80]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+72]
|
|
|
- mov r12, QWORD PTR [rdx+72]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- mov r12, QWORD PTR [rdx+56]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+48]
|
|
|
- mov r12, QWORD PTR [rdx+48]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+32]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+8]
|
|
|
- mov r12, QWORD PTR [rdx+8]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx]
|
|
|
- mov r12, QWORD PTR [rdx]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- xor rax, r8
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_cmp_48 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_get_from_table_48 PROC
|
|
|
- sub rsp, 128
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- mov rax, 1
|
|
|
- movd xmm10, r8
|
|
|
- movd xmm11, rax
|
|
|
- pxor xmm13, xmm13
|
|
|
- pshufd xmm11, xmm11, 0
|
|
|
- pshufd xmm10, xmm10, 0
|
|
|
- ; START: 0-7
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 0-7
|
|
|
- ; START: 8-15
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 8-15
|
|
|
- ; START: 16-23
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 16-23
|
|
|
- ; START: 24-31
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 24-31
|
|
|
- ; START: 32-39
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 32-39
|
|
|
- ; START: 40-47
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- ; END: 40-47
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_3072_get_from_table_48 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Reduce the number back to 3072 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_mont_reduce_avx2_48 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- mov r9, rcx
|
|
|
- mov r10, rdx
|
|
|
- xor rbp, rbp
|
|
|
- ; i = 48
|
|
|
- mov r11, 48
|
|
|
- mov r14, QWORD PTR [r9]
|
|
|
- mov r15, QWORD PTR [r9+8]
|
|
|
- mov rdi, QWORD PTR [r9+16]
|
|
|
- mov rsi, QWORD PTR [r9+24]
|
|
|
- add r9, 192
|
|
|
- xor rbp, rbp
|
|
|
-L_3072_mont_reduce_avx2_48_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r12, r14
|
|
|
- imul rdx, r8
|
|
|
- xor rbx, rbx
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10]
|
|
|
- mov r14, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r14, rcx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+8]
|
|
|
- mov r15, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+16]
|
|
|
- mov rdi, rsi
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+24]
|
|
|
- mov rsi, QWORD PTR [r9+-160]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+32]
|
|
|
- mov r13, QWORD PTR [r9+-152]
|
|
|
- adcx rsi, rax
|
|
|
- adox r13, rcx
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+40]
|
|
|
- mov r12, QWORD PTR [r9+-144]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-152], r13
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+48]
|
|
|
- mov r13, QWORD PTR [r9+-136]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-144], r12
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+56]
|
|
|
- mov r12, QWORD PTR [r9+-128]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-136], r13
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+64]
|
|
|
- mov r13, QWORD PTR [r9+-120]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-128], r12
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+72]
|
|
|
- mov r12, QWORD PTR [r9+-112]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-120], r13
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+80]
|
|
|
- mov r13, QWORD PTR [r9+-104]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-112], r12
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+88]
|
|
|
- mov r12, QWORD PTR [r9+-96]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-104], r13
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+96]
|
|
|
- mov r13, QWORD PTR [r9+-88]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-96], r12
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+104]
|
|
|
- mov r12, QWORD PTR [r9+-80]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-88], r13
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+112]
|
|
|
- mov r13, QWORD PTR [r9+-72]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-80], r12
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+120]
|
|
|
- mov r12, QWORD PTR [r9+-64]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-72], r13
|
|
|
- ; a[i+16] += m[16] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+128]
|
|
|
- mov r13, QWORD PTR [r9+-56]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-64], r12
|
|
|
- ; a[i+17] += m[17] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+136]
|
|
|
- mov r12, QWORD PTR [r9+-48]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-56], r13
|
|
|
- ; a[i+18] += m[18] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+144]
|
|
|
- mov r13, QWORD PTR [r9+-40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-48], r12
|
|
|
- ; a[i+19] += m[19] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+152]
|
|
|
- mov r12, QWORD PTR [r9+-32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-40], r13
|
|
|
- ; a[i+20] += m[20] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+160]
|
|
|
- mov r13, QWORD PTR [r9+-24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-32], r12
|
|
|
- ; a[i+21] += m[21] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+168]
|
|
|
- mov r12, QWORD PTR [r9+-16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-24], r13
|
|
|
- ; a[i+22] += m[22] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+176]
|
|
|
- mov r13, QWORD PTR [r9+-8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-16], r12
|
|
|
- ; a[i+23] += m[23] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+184]
|
|
|
- mov r12, QWORD PTR [r9]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-8], r13
|
|
|
- ; a[i+24] += m[24] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+192]
|
|
|
- mov r13, QWORD PTR [r9+8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9], r12
|
|
|
- ; a[i+25] += m[25] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+200]
|
|
|
- mov r12, QWORD PTR [r9+16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+8], r13
|
|
|
- ; a[i+26] += m[26] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+208]
|
|
|
- mov r13, QWORD PTR [r9+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+16], r12
|
|
|
- ; a[i+27] += m[27] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+216]
|
|
|
- mov r12, QWORD PTR [r9+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+24], r13
|
|
|
- ; a[i+28] += m[28] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+224]
|
|
|
- mov r13, QWORD PTR [r9+40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+32], r12
|
|
|
- ; a[i+29] += m[29] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+232]
|
|
|
- mov r12, QWORD PTR [r9+48]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+40], r13
|
|
|
- ; a[i+30] += m[30] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+240]
|
|
|
- mov r13, QWORD PTR [r9+56]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+48], r12
|
|
|
- ; a[i+31] += m[31] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+248]
|
|
|
- mov r12, QWORD PTR [r9+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+56], r13
|
|
|
- ; a[i+32] += m[32] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+256]
|
|
|
- mov r13, QWORD PTR [r9+72]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+64], r12
|
|
|
- ; a[i+33] += m[33] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+264]
|
|
|
- mov r12, QWORD PTR [r9+80]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+72], r13
|
|
|
- ; a[i+34] += m[34] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+272]
|
|
|
- mov r13, QWORD PTR [r9+88]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+80], r12
|
|
|
- ; a[i+35] += m[35] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+280]
|
|
|
- mov r12, QWORD PTR [r9+96]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+88], r13
|
|
|
- ; a[i+36] += m[36] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+288]
|
|
|
- mov r13, QWORD PTR [r9+104]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+96], r12
|
|
|
- ; a[i+37] += m[37] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+296]
|
|
|
- mov r12, QWORD PTR [r9+112]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+104], r13
|
|
|
- ; a[i+38] += m[38] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+304]
|
|
|
- mov r13, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+112], r12
|
|
|
- ; a[i+39] += m[39] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+312]
|
|
|
- mov r12, QWORD PTR [r9+128]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+120], r13
|
|
|
- ; a[i+40] += m[40] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+320]
|
|
|
- mov r13, QWORD PTR [r9+136]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+128], r12
|
|
|
- ; a[i+41] += m[41] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+328]
|
|
|
- mov r12, QWORD PTR [r9+144]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+136], r13
|
|
|
- ; a[i+42] += m[42] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+336]
|
|
|
- mov r13, QWORD PTR [r9+152]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+144], r12
|
|
|
- ; a[i+43] += m[43] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+344]
|
|
|
- mov r12, QWORD PTR [r9+160]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+152], r13
|
|
|
- ; a[i+44] += m[44] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+352]
|
|
|
- mov r13, QWORD PTR [r9+168]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+160], r12
|
|
|
- ; a[i+45] += m[45] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+360]
|
|
|
- mov r12, QWORD PTR [r9+176]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+168], r13
|
|
|
- ; a[i+46] += m[46] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+368]
|
|
|
- mov r13, QWORD PTR [r9+184]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+176], r12
|
|
|
- ; a[i+47] += m[47] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+376]
|
|
|
- mov r12, QWORD PTR [r9+192]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+184], r13
|
|
|
- adcx r12, rbp
|
|
|
- mov rbp, rbx
|
|
|
- mov QWORD PTR [r9+192], r12
|
|
|
- adox rbp, rbx
|
|
|
- adcx rbp, rbx
|
|
|
- ; a += 1
|
|
|
- add r9, 8
|
|
|
- ; i -= 1
|
|
|
- sub r11, 1
|
|
|
- jnz L_3072_mont_reduce_avx2_48_loop
|
|
|
- sub r9, 192
|
|
|
- neg rbp
|
|
|
- mov r8, r9
|
|
|
- sub r9, 384
|
|
|
- mov rcx, QWORD PTR [r10]
|
|
|
- mov rdx, r14
|
|
|
- pext rcx, rcx, rbp
|
|
|
- sub rdx, rcx
|
|
|
- mov rcx, QWORD PTR [r10+8]
|
|
|
- mov rax, r15
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+16]
|
|
|
- mov rcx, rdi
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+8], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov rdx, rsi
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+16], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+24], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+40]
|
|
|
- mov rcx, QWORD PTR [r8+40]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+32], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- mov rdx, QWORD PTR [r8+48]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+40], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+56]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+48], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+64]
|
|
|
- mov rcx, QWORD PTR [r8+64]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+56], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov rdx, QWORD PTR [r8+72]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+64], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+72], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+88]
|
|
|
- mov rcx, QWORD PTR [r8+88]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+80], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- mov rdx, QWORD PTR [r8+96]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+88], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+104]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+96], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+112]
|
|
|
- mov rcx, QWORD PTR [r8+112]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+104], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov rdx, QWORD PTR [r8+120]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+112], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+128]
|
|
|
- mov rax, QWORD PTR [r8+128]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+120], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+136]
|
|
|
- mov rcx, QWORD PTR [r8+136]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+128], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+144]
|
|
|
- mov rdx, QWORD PTR [r8+144]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+136], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+152]
|
|
|
- mov rax, QWORD PTR [r8+152]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+144], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+160]
|
|
|
- mov rcx, QWORD PTR [r8+160]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+152], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov rdx, QWORD PTR [r8+168]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+160], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+176]
|
|
|
- mov rax, QWORD PTR [r8+176]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+168], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+184]
|
|
|
- mov rcx, QWORD PTR [r8+184]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+176], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+192]
|
|
|
- mov rdx, QWORD PTR [r8+192]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+184], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+200]
|
|
|
- mov rax, QWORD PTR [r8+200]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+192], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+208]
|
|
|
- mov rcx, QWORD PTR [r8+208]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+200], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+216]
|
|
|
- mov rdx, QWORD PTR [r8+216]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+208], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+224]
|
|
|
- mov rax, QWORD PTR [r8+224]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+216], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+232]
|
|
|
- mov rcx, QWORD PTR [r8+232]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+224], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+240]
|
|
|
- mov rdx, QWORD PTR [r8+240]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+232], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+248]
|
|
|
- mov rax, QWORD PTR [r8+248]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+240], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+256]
|
|
|
- mov rcx, QWORD PTR [r8+256]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+248], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+264]
|
|
|
- mov rdx, QWORD PTR [r8+264]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+256], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+272]
|
|
|
- mov rax, QWORD PTR [r8+272]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+264], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+280]
|
|
|
- mov rcx, QWORD PTR [r8+280]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+272], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+288]
|
|
|
- mov rdx, QWORD PTR [r8+288]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+280], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+296]
|
|
|
- mov rax, QWORD PTR [r8+296]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+288], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+304]
|
|
|
- mov rcx, QWORD PTR [r8+304]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+296], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+312]
|
|
|
- mov rdx, QWORD PTR [r8+312]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+304], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+320]
|
|
|
- mov rax, QWORD PTR [r8+320]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+312], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+328]
|
|
|
- mov rcx, QWORD PTR [r8+328]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+320], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+336]
|
|
|
- mov rdx, QWORD PTR [r8+336]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+328], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+344]
|
|
|
- mov rax, QWORD PTR [r8+344]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+336], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+352]
|
|
|
- mov rcx, QWORD PTR [r8+352]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+344], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+360]
|
|
|
- mov rdx, QWORD PTR [r8+360]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+352], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+368]
|
|
|
- mov rax, QWORD PTR [r8+368]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+360], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+376]
|
|
|
- mov rcx, QWORD PTR [r8+376]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+368], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov QWORD PTR [r9+376], rcx
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_mont_reduce_avx2_48 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_get_from_table_avx2_48 PROC
|
|
|
- sub rsp, 128
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- mov rax, 1
|
|
|
- movd xmm10, r8
|
|
|
- movd xmm11, rax
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpermd ymm10, ymm13, ymm10
|
|
|
- vpermd ymm11, ymm13, ymm11
|
|
|
- ; START: 0-15
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpxor ymm7, ymm7, ymm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- vmovdqu YMMWORD PTR [rcx+64], ymm6
|
|
|
- vmovdqu YMMWORD PTR [rcx+96], ymm7
|
|
|
- add rcx, 128
|
|
|
- ; END: 0-15
|
|
|
- ; START: 16-31
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpxor ymm7, ymm7, ymm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- vmovdqu YMMWORD PTR [rcx+64], ymm6
|
|
|
- vmovdqu YMMWORD PTR [rcx+96], ymm7
|
|
|
- add rcx, 128
|
|
|
- ; END: 16-31
|
|
|
- ; START: 32-47
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpxor ymm7, ymm7, ymm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- vmovdqu YMMWORD PTR [rcx+64], ymm6
|
|
|
- vmovdqu YMMWORD PTR [rcx+96], ymm7
|
|
|
- ; END: 32-47
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_3072_get_from_table_avx2_48 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Conditionally add a and b using the mask m.
|
|
|
-; * m is -1 to add and 0 when not.
|
|
|
-; *
|
|
|
-; * r A single precision number representing conditional add result.
|
|
|
-; * a A single precision number to add with.
|
|
|
-; * b A single precision number to add.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_cond_add_24 PROC
|
|
|
- sub rsp, 192
|
|
|
- mov rax, 0
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov r10, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [r8+136]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+128], r10
|
|
|
- mov QWORD PTR [rsp+136], r11
|
|
|
- mov r10, QWORD PTR [r8+144]
|
|
|
- mov r11, QWORD PTR [r8+152]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+144], r10
|
|
|
- mov QWORD PTR [rsp+152], r11
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+160], r10
|
|
|
- mov QWORD PTR [rsp+168], r11
|
|
|
- mov r10, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [r8+184]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+176], r10
|
|
|
- mov QWORD PTR [rsp+184], r11
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rsp]
|
|
|
- add r10, r8
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rsp+8]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r8, QWORD PTR [rsp+16]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rsp+24]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rsp+32]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rsp+40]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r8, QWORD PTR [rsp+48]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rsp+56]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov r8, QWORD PTR [rsp+64]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov r11, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rsp+72]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r8, QWORD PTR [rsp+80]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rsp+88]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- mov r8, QWORD PTR [rsp+96]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rsp+104]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r8, QWORD PTR [rsp+112]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rsp+120]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov r10, QWORD PTR [rdx+128]
|
|
|
- mov r8, QWORD PTR [rsp+128]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- mov r11, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [rsp+136]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+128], r10
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- mov r8, QWORD PTR [rsp+144]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+136], r11
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rsp+152]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- mov r10, QWORD PTR [rdx+160]
|
|
|
- mov r8, QWORD PTR [rsp+160]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- mov r11, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [rsp+168]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+160], r10
|
|
|
- mov r10, QWORD PTR [rdx+176]
|
|
|
- mov r8, QWORD PTR [rsp+176]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+168], r11
|
|
|
- mov r11, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rsp+184]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+176], r10
|
|
|
- mov QWORD PTR [rcx+184], r11
|
|
|
- adc rax, 0
|
|
|
- add rsp, 192
|
|
|
- ret
|
|
|
-sp_3072_cond_add_24 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally add a and b using the mask m.
|
|
|
-; * m is -1 to add and 0 when not.
|
|
|
-; *
|
|
|
-; * r A single precision number representing conditional add result.
|
|
|
-; * a A single precision number to add with.
|
|
|
-; * b A single precision number to add.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_cond_add_avx2_24 PROC
|
|
|
- push r12
|
|
|
- mov rax, 0
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- pext r12, r12, r9
|
|
|
- add r10, r12
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+104]
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [rdx+128]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+136]
|
|
|
- mov r12, QWORD PTR [rdx+136]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+152]
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r12, QWORD PTR [rdx+160]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [rdx+176]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+184]
|
|
|
- mov r12, QWORD PTR [rdx+184]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- adc r12, r10
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- adc rax, 0
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_cond_add_avx2_24 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Shift number left by n bit. (r = a << n)
|
|
|
-; *
|
|
|
-; * r Result of left shift by n.
|
|
|
-; * a Number to shift.
|
|
|
-; * n Amoutnt o shift.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_3072_lshift_48 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov cl, r8b
|
|
|
- mov rax, rcx
|
|
|
- mov r12, 0
|
|
|
- mov r13, QWORD PTR [rdx+344]
|
|
|
- mov r8, QWORD PTR [rdx+352]
|
|
|
- mov r9, QWORD PTR [rdx+360]
|
|
|
- mov r10, QWORD PTR [rdx+368]
|
|
|
- mov r11, QWORD PTR [rdx+376]
|
|
|
- shld r12, r11, cl
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+352], r8
|
|
|
- mov QWORD PTR [rax+360], r9
|
|
|
- mov QWORD PTR [rax+368], r10
|
|
|
- mov QWORD PTR [rax+376], r11
|
|
|
- mov QWORD PTR [rax+384], r12
|
|
|
- mov r11, QWORD PTR [rdx+312]
|
|
|
- mov r8, QWORD PTR [rdx+320]
|
|
|
- mov r9, QWORD PTR [rdx+328]
|
|
|
- mov r10, QWORD PTR [rdx+336]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+320], r8
|
|
|
- mov QWORD PTR [rax+328], r9
|
|
|
- mov QWORD PTR [rax+336], r10
|
|
|
- mov QWORD PTR [rax+344], r13
|
|
|
- mov r13, QWORD PTR [rdx+280]
|
|
|
- mov r8, QWORD PTR [rdx+288]
|
|
|
- mov r9, QWORD PTR [rdx+296]
|
|
|
- mov r10, QWORD PTR [rdx+304]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+288], r8
|
|
|
- mov QWORD PTR [rax+296], r9
|
|
|
- mov QWORD PTR [rax+304], r10
|
|
|
- mov QWORD PTR [rax+312], r11
|
|
|
- mov r11, QWORD PTR [rdx+248]
|
|
|
- mov r8, QWORD PTR [rdx+256]
|
|
|
- mov r9, QWORD PTR [rdx+264]
|
|
|
- mov r10, QWORD PTR [rdx+272]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+256], r8
|
|
|
- mov QWORD PTR [rax+264], r9
|
|
|
- mov QWORD PTR [rax+272], r10
|
|
|
- mov QWORD PTR [rax+280], r13
|
|
|
- mov r13, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [rdx+224]
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- mov r10, QWORD PTR [rdx+240]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+224], r8
|
|
|
- mov QWORD PTR [rax+232], r9
|
|
|
- mov QWORD PTR [rax+240], r10
|
|
|
- mov QWORD PTR [rax+248], r11
|
|
|
- mov r11, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rdx+192]
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- mov r10, QWORD PTR [rdx+208]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+192], r8
|
|
|
- mov QWORD PTR [rax+200], r9
|
|
|
- mov QWORD PTR [rax+208], r10
|
|
|
- mov QWORD PTR [rax+216], r13
|
|
|
- mov r13, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rdx+160]
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- mov r10, QWORD PTR [rdx+176]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+160], r8
|
|
|
- mov QWORD PTR [rax+168], r9
|
|
|
- mov QWORD PTR [rax+176], r10
|
|
|
- mov QWORD PTR [rax+184], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rdx+128]
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+128], r8
|
|
|
- mov QWORD PTR [rax+136], r9
|
|
|
- mov QWORD PTR [rax+144], r10
|
|
|
- mov QWORD PTR [rax+152], r13
|
|
|
- mov r13, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+96], r8
|
|
|
- mov QWORD PTR [rax+104], r9
|
|
|
- mov QWORD PTR [rax+112], r10
|
|
|
- mov QWORD PTR [rax+120], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+64], r8
|
|
|
- mov QWORD PTR [rax+72], r9
|
|
|
- mov QWORD PTR [rax+80], r10
|
|
|
- mov QWORD PTR [rax+88], r13
|
|
|
- mov r13, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+32], r8
|
|
|
- mov QWORD PTR [rax+40], r9
|
|
|
- mov QWORD PTR [rax+48], r10
|
|
|
- mov QWORD PTR [rax+56], r11
|
|
|
- mov r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shl r8, cl
|
|
|
- mov QWORD PTR [rax], r8
|
|
|
- mov QWORD PTR [rax+8], r9
|
|
|
- mov QWORD PTR [rax+16], r10
|
|
|
- mov QWORD PTR [rax+24], r13
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_3072_lshift_48 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-IFDEF WOLFSSL_SP_4096
|
|
|
-IFDEF WOLFSSL_SP_4096
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_from_bin_bswap PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 512
|
|
|
- xor r13, r13
|
|
|
- jmp L_4096_from_bin_bswap_64_end
|
|
|
-L_4096_from_bin_bswap_64_start:
|
|
|
- sub r11, 64
|
|
|
- mov rax, QWORD PTR [r11+56]
|
|
|
- mov r10, QWORD PTR [r11+48]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov rax, QWORD PTR [r11+40]
|
|
|
- mov r10, QWORD PTR [r11+32]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov rax, QWORD PTR [r11+24]
|
|
|
- mov r10, QWORD PTR [r11+16]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov rax, QWORD PTR [r11+8]
|
|
|
- mov r10, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_4096_from_bin_bswap_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_4096_from_bin_bswap_64_start
|
|
|
- jmp L_4096_from_bin_bswap_8_end
|
|
|
-L_4096_from_bin_bswap_8_start:
|
|
|
- sub r11, 8
|
|
|
- mov rax, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_4096_from_bin_bswap_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_4096_from_bin_bswap_8_start
|
|
|
- cmp r9, r13
|
|
|
- je L_4096_from_bin_bswap_hi_end
|
|
|
- mov r10, r13
|
|
|
- mov rax, r13
|
|
|
-L_4096_from_bin_bswap_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_4096_from_bin_bswap_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_4096_from_bin_bswap_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_4096_from_bin_bswap_zero_end
|
|
|
-L_4096_from_bin_bswap_zero_start:
|
|
|
- mov QWORD PTR [rcx], r13
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_4096_from_bin_bswap_zero_start
|
|
|
-L_4096_from_bin_bswap_zero_end:
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_from_bin_bswap ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the movbe instruction which is an optional instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_from_bin_movbe PROC
|
|
|
- push r12
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 512
|
|
|
- jmp L_4096_from_bin_movbe_64_end
|
|
|
-L_4096_from_bin_movbe_64_start:
|
|
|
- sub r11, 64
|
|
|
- movbe rax, QWORD PTR [r11+56]
|
|
|
- movbe r10, QWORD PTR [r11+48]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- movbe rax, QWORD PTR [r11+40]
|
|
|
- movbe r10, QWORD PTR [r11+32]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- movbe rax, QWORD PTR [r11+24]
|
|
|
- movbe r10, QWORD PTR [r11+16]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- movbe rax, QWORD PTR [r11+8]
|
|
|
- movbe r10, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_4096_from_bin_movbe_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_4096_from_bin_movbe_64_start
|
|
|
- jmp L_4096_from_bin_movbe_8_end
|
|
|
-L_4096_from_bin_movbe_8_start:
|
|
|
- sub r11, 8
|
|
|
- movbe rax, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_4096_from_bin_movbe_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_4096_from_bin_movbe_8_start
|
|
|
- cmp r9, 0
|
|
|
- je L_4096_from_bin_movbe_hi_end
|
|
|
- mov r10, 0
|
|
|
- mov rax, 0
|
|
|
-L_4096_from_bin_movbe_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_4096_from_bin_movbe_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_4096_from_bin_movbe_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_4096_from_bin_movbe_zero_end
|
|
|
-L_4096_from_bin_movbe_zero_start:
|
|
|
- mov QWORD PTR [rcx], 0
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_4096_from_bin_movbe_zero_start
|
|
|
-L_4096_from_bin_movbe_zero_end:
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_from_bin_movbe ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 512
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_to_bin_bswap_64 PROC
|
|
|
- mov rax, QWORD PTR [rcx+504]
|
|
|
- mov r8, QWORD PTR [rcx+496]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx], rax
|
|
|
- mov QWORD PTR [rdx+8], r8
|
|
|
- mov rax, QWORD PTR [rcx+488]
|
|
|
- mov r8, QWORD PTR [rcx+480]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+16], rax
|
|
|
- mov QWORD PTR [rdx+24], r8
|
|
|
- mov rax, QWORD PTR [rcx+472]
|
|
|
- mov r8, QWORD PTR [rcx+464]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+32], rax
|
|
|
- mov QWORD PTR [rdx+40], r8
|
|
|
- mov rax, QWORD PTR [rcx+456]
|
|
|
- mov r8, QWORD PTR [rcx+448]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+48], rax
|
|
|
- mov QWORD PTR [rdx+56], r8
|
|
|
- mov rax, QWORD PTR [rcx+440]
|
|
|
- mov r8, QWORD PTR [rcx+432]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+64], rax
|
|
|
- mov QWORD PTR [rdx+72], r8
|
|
|
- mov rax, QWORD PTR [rcx+424]
|
|
|
- mov r8, QWORD PTR [rcx+416]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+80], rax
|
|
|
- mov QWORD PTR [rdx+88], r8
|
|
|
- mov rax, QWORD PTR [rcx+408]
|
|
|
- mov r8, QWORD PTR [rcx+400]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+96], rax
|
|
|
- mov QWORD PTR [rdx+104], r8
|
|
|
- mov rax, QWORD PTR [rcx+392]
|
|
|
- mov r8, QWORD PTR [rcx+384]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+112], rax
|
|
|
- mov QWORD PTR [rdx+120], r8
|
|
|
- mov rax, QWORD PTR [rcx+376]
|
|
|
- mov r8, QWORD PTR [rcx+368]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+128], rax
|
|
|
- mov QWORD PTR [rdx+136], r8
|
|
|
- mov rax, QWORD PTR [rcx+360]
|
|
|
- mov r8, QWORD PTR [rcx+352]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+144], rax
|
|
|
- mov QWORD PTR [rdx+152], r8
|
|
|
- mov rax, QWORD PTR [rcx+344]
|
|
|
- mov r8, QWORD PTR [rcx+336]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+160], rax
|
|
|
- mov QWORD PTR [rdx+168], r8
|
|
|
- mov rax, QWORD PTR [rcx+328]
|
|
|
- mov r8, QWORD PTR [rcx+320]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+176], rax
|
|
|
- mov QWORD PTR [rdx+184], r8
|
|
|
- mov rax, QWORD PTR [rcx+312]
|
|
|
- mov r8, QWORD PTR [rcx+304]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+192], rax
|
|
|
- mov QWORD PTR [rdx+200], r8
|
|
|
- mov rax, QWORD PTR [rcx+296]
|
|
|
- mov r8, QWORD PTR [rcx+288]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+208], rax
|
|
|
- mov QWORD PTR [rdx+216], r8
|
|
|
- mov rax, QWORD PTR [rcx+280]
|
|
|
- mov r8, QWORD PTR [rcx+272]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+224], rax
|
|
|
- mov QWORD PTR [rdx+232], r8
|
|
|
- mov rax, QWORD PTR [rcx+264]
|
|
|
- mov r8, QWORD PTR [rcx+256]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+240], rax
|
|
|
- mov QWORD PTR [rdx+248], r8
|
|
|
- mov rax, QWORD PTR [rcx+248]
|
|
|
- mov r8, QWORD PTR [rcx+240]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+256], rax
|
|
|
- mov QWORD PTR [rdx+264], r8
|
|
|
- mov rax, QWORD PTR [rcx+232]
|
|
|
- mov r8, QWORD PTR [rcx+224]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+272], rax
|
|
|
- mov QWORD PTR [rdx+280], r8
|
|
|
- mov rax, QWORD PTR [rcx+216]
|
|
|
- mov r8, QWORD PTR [rcx+208]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+288], rax
|
|
|
- mov QWORD PTR [rdx+296], r8
|
|
|
- mov rax, QWORD PTR [rcx+200]
|
|
|
- mov r8, QWORD PTR [rcx+192]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+304], rax
|
|
|
- mov QWORD PTR [rdx+312], r8
|
|
|
- mov rax, QWORD PTR [rcx+184]
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+320], rax
|
|
|
- mov QWORD PTR [rdx+328], r8
|
|
|
- mov rax, QWORD PTR [rcx+168]
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+336], rax
|
|
|
- mov QWORD PTR [rdx+344], r8
|
|
|
- mov rax, QWORD PTR [rcx+152]
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+352], rax
|
|
|
- mov QWORD PTR [rdx+360], r8
|
|
|
- mov rax, QWORD PTR [rcx+136]
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+368], rax
|
|
|
- mov QWORD PTR [rdx+376], r8
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+384], rax
|
|
|
- mov QWORD PTR [rdx+392], r8
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+400], rax
|
|
|
- mov QWORD PTR [rdx+408], r8
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+416], rax
|
|
|
- mov QWORD PTR [rdx+424], r8
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+432], rax
|
|
|
- mov QWORD PTR [rdx+440], r8
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+448], rax
|
|
|
- mov QWORD PTR [rdx+456], r8
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+464], rax
|
|
|
- mov QWORD PTR [rdx+472], r8
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+480], rax
|
|
|
- mov QWORD PTR [rdx+488], r8
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+496], rax
|
|
|
- mov QWORD PTR [rdx+504], r8
|
|
|
- ret
|
|
|
-sp_4096_to_bin_bswap_64 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 512
|
|
|
-; * Uses the movbe instruction which is optional.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_to_bin_movbe_64 PROC
|
|
|
- movbe rax, QWORD PTR [rcx+504]
|
|
|
- movbe r8, QWORD PTR [rcx+496]
|
|
|
- mov QWORD PTR [rdx], rax
|
|
|
- mov QWORD PTR [rdx+8], r8
|
|
|
- movbe rax, QWORD PTR [rcx+488]
|
|
|
- movbe r8, QWORD PTR [rcx+480]
|
|
|
- mov QWORD PTR [rdx+16], rax
|
|
|
- mov QWORD PTR [rdx+24], r8
|
|
|
- movbe rax, QWORD PTR [rcx+472]
|
|
|
- movbe r8, QWORD PTR [rcx+464]
|
|
|
- mov QWORD PTR [rdx+32], rax
|
|
|
- mov QWORD PTR [rdx+40], r8
|
|
|
- movbe rax, QWORD PTR [rcx+456]
|
|
|
- movbe r8, QWORD PTR [rcx+448]
|
|
|
- mov QWORD PTR [rdx+48], rax
|
|
|
- mov QWORD PTR [rdx+56], r8
|
|
|
- movbe rax, QWORD PTR [rcx+440]
|
|
|
- movbe r8, QWORD PTR [rcx+432]
|
|
|
- mov QWORD PTR [rdx+64], rax
|
|
|
- mov QWORD PTR [rdx+72], r8
|
|
|
- movbe rax, QWORD PTR [rcx+424]
|
|
|
- movbe r8, QWORD PTR [rcx+416]
|
|
|
- mov QWORD PTR [rdx+80], rax
|
|
|
- mov QWORD PTR [rdx+88], r8
|
|
|
- movbe rax, QWORD PTR [rcx+408]
|
|
|
- movbe r8, QWORD PTR [rcx+400]
|
|
|
- mov QWORD PTR [rdx+96], rax
|
|
|
- mov QWORD PTR [rdx+104], r8
|
|
|
- movbe rax, QWORD PTR [rcx+392]
|
|
|
- movbe r8, QWORD PTR [rcx+384]
|
|
|
- mov QWORD PTR [rdx+112], rax
|
|
|
- mov QWORD PTR [rdx+120], r8
|
|
|
- movbe rax, QWORD PTR [rcx+376]
|
|
|
- movbe r8, QWORD PTR [rcx+368]
|
|
|
- mov QWORD PTR [rdx+128], rax
|
|
|
- mov QWORD PTR [rdx+136], r8
|
|
|
- movbe rax, QWORD PTR [rcx+360]
|
|
|
- movbe r8, QWORD PTR [rcx+352]
|
|
|
- mov QWORD PTR [rdx+144], rax
|
|
|
- mov QWORD PTR [rdx+152], r8
|
|
|
- movbe rax, QWORD PTR [rcx+344]
|
|
|
- movbe r8, QWORD PTR [rcx+336]
|
|
|
- mov QWORD PTR [rdx+160], rax
|
|
|
- mov QWORD PTR [rdx+168], r8
|
|
|
- movbe rax, QWORD PTR [rcx+328]
|
|
|
- movbe r8, QWORD PTR [rcx+320]
|
|
|
- mov QWORD PTR [rdx+176], rax
|
|
|
- mov QWORD PTR [rdx+184], r8
|
|
|
- movbe rax, QWORD PTR [rcx+312]
|
|
|
- movbe r8, QWORD PTR [rcx+304]
|
|
|
- mov QWORD PTR [rdx+192], rax
|
|
|
- mov QWORD PTR [rdx+200], r8
|
|
|
- movbe rax, QWORD PTR [rcx+296]
|
|
|
- movbe r8, QWORD PTR [rcx+288]
|
|
|
- mov QWORD PTR [rdx+208], rax
|
|
|
- mov QWORD PTR [rdx+216], r8
|
|
|
- movbe rax, QWORD PTR [rcx+280]
|
|
|
- movbe r8, QWORD PTR [rcx+272]
|
|
|
- mov QWORD PTR [rdx+224], rax
|
|
|
- mov QWORD PTR [rdx+232], r8
|
|
|
- movbe rax, QWORD PTR [rcx+264]
|
|
|
- movbe r8, QWORD PTR [rcx+256]
|
|
|
- mov QWORD PTR [rdx+240], rax
|
|
|
- mov QWORD PTR [rdx+248], r8
|
|
|
- movbe rax, QWORD PTR [rcx+248]
|
|
|
- movbe r8, QWORD PTR [rcx+240]
|
|
|
- mov QWORD PTR [rdx+256], rax
|
|
|
- mov QWORD PTR [rdx+264], r8
|
|
|
- movbe rax, QWORD PTR [rcx+232]
|
|
|
- movbe r8, QWORD PTR [rcx+224]
|
|
|
- mov QWORD PTR [rdx+272], rax
|
|
|
- mov QWORD PTR [rdx+280], r8
|
|
|
- movbe rax, QWORD PTR [rcx+216]
|
|
|
- movbe r8, QWORD PTR [rcx+208]
|
|
|
- mov QWORD PTR [rdx+288], rax
|
|
|
- mov QWORD PTR [rdx+296], r8
|
|
|
- movbe rax, QWORD PTR [rcx+200]
|
|
|
- movbe r8, QWORD PTR [rcx+192]
|
|
|
- mov QWORD PTR [rdx+304], rax
|
|
|
- mov QWORD PTR [rdx+312], r8
|
|
|
- movbe rax, QWORD PTR [rcx+184]
|
|
|
- movbe r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rdx+320], rax
|
|
|
- mov QWORD PTR [rdx+328], r8
|
|
|
- movbe rax, QWORD PTR [rcx+168]
|
|
|
- movbe r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rdx+336], rax
|
|
|
- mov QWORD PTR [rdx+344], r8
|
|
|
- movbe rax, QWORD PTR [rcx+152]
|
|
|
- movbe r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rdx+352], rax
|
|
|
- mov QWORD PTR [rdx+360], r8
|
|
|
- movbe rax, QWORD PTR [rcx+136]
|
|
|
- movbe r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rdx+368], rax
|
|
|
- mov QWORD PTR [rdx+376], r8
|
|
|
- movbe rax, QWORD PTR [rcx+120]
|
|
|
- movbe r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rdx+384], rax
|
|
|
- mov QWORD PTR [rdx+392], r8
|
|
|
- movbe rax, QWORD PTR [rcx+104]
|
|
|
- movbe r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rdx+400], rax
|
|
|
- mov QWORD PTR [rdx+408], r8
|
|
|
- movbe rax, QWORD PTR [rcx+88]
|
|
|
- movbe r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rdx+416], rax
|
|
|
- mov QWORD PTR [rdx+424], r8
|
|
|
- movbe rax, QWORD PTR [rcx+72]
|
|
|
- movbe r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rdx+432], rax
|
|
|
- mov QWORD PTR [rdx+440], r8
|
|
|
- movbe rax, QWORD PTR [rcx+56]
|
|
|
- movbe r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rdx+448], rax
|
|
|
- mov QWORD PTR [rdx+456], r8
|
|
|
- movbe rax, QWORD PTR [rcx+40]
|
|
|
- movbe r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rdx+464], rax
|
|
|
- mov QWORD PTR [rdx+472], r8
|
|
|
- movbe rax, QWORD PTR [rcx+24]
|
|
|
- movbe r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rdx+480], rax
|
|
|
- mov QWORD PTR [rdx+488], r8
|
|
|
- movbe rax, QWORD PTR [rcx+8]
|
|
|
- movbe r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rdx+496], rax
|
|
|
- mov QWORD PTR [rdx+504], r8
|
|
|
- ret
|
|
|
-sp_4096_to_bin_movbe_64 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Sub b from a into a. (a -= b)
|
|
|
-; *
|
|
|
-; * a A single precision integer and result.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_sub_in_place_64 PROC
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- sub r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb r9, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb r9, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb r9, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb r9, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb r9, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb r9, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb r9, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb r9, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov r9, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- sbb r9, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], r9
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov r9, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- sbb r9, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], r9
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov r9, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- sbb r9, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], r9
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov r9, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- sbb r9, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rcx+192]
|
|
|
- mov QWORD PTR [rcx+184], r9
|
|
|
- sbb r8, QWORD PTR [rdx+192]
|
|
|
- mov r9, QWORD PTR [rcx+200]
|
|
|
- mov QWORD PTR [rcx+192], r8
|
|
|
- sbb r9, QWORD PTR [rdx+200]
|
|
|
- mov r8, QWORD PTR [rcx+208]
|
|
|
- mov QWORD PTR [rcx+200], r9
|
|
|
- sbb r8, QWORD PTR [rdx+208]
|
|
|
- mov r9, QWORD PTR [rcx+216]
|
|
|
- mov QWORD PTR [rcx+208], r8
|
|
|
- sbb r9, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [rcx+224]
|
|
|
- mov QWORD PTR [rcx+216], r9
|
|
|
- sbb r8, QWORD PTR [rdx+224]
|
|
|
- mov r9, QWORD PTR [rcx+232]
|
|
|
- mov QWORD PTR [rcx+224], r8
|
|
|
- sbb r9, QWORD PTR [rdx+232]
|
|
|
- mov r8, QWORD PTR [rcx+240]
|
|
|
- mov QWORD PTR [rcx+232], r9
|
|
|
- sbb r8, QWORD PTR [rdx+240]
|
|
|
- mov r9, QWORD PTR [rcx+248]
|
|
|
- mov QWORD PTR [rcx+240], r8
|
|
|
- sbb r9, QWORD PTR [rdx+248]
|
|
|
- mov r8, QWORD PTR [rcx+256]
|
|
|
- mov QWORD PTR [rcx+248], r9
|
|
|
- sbb r8, QWORD PTR [rdx+256]
|
|
|
- mov r9, QWORD PTR [rcx+264]
|
|
|
- mov QWORD PTR [rcx+256], r8
|
|
|
- sbb r9, QWORD PTR [rdx+264]
|
|
|
- mov r8, QWORD PTR [rcx+272]
|
|
|
- mov QWORD PTR [rcx+264], r9
|
|
|
- sbb r8, QWORD PTR [rdx+272]
|
|
|
- mov r9, QWORD PTR [rcx+280]
|
|
|
- mov QWORD PTR [rcx+272], r8
|
|
|
- sbb r9, QWORD PTR [rdx+280]
|
|
|
- mov r8, QWORD PTR [rcx+288]
|
|
|
- mov QWORD PTR [rcx+280], r9
|
|
|
- sbb r8, QWORD PTR [rdx+288]
|
|
|
- mov r9, QWORD PTR [rcx+296]
|
|
|
- mov QWORD PTR [rcx+288], r8
|
|
|
- sbb r9, QWORD PTR [rdx+296]
|
|
|
- mov r8, QWORD PTR [rcx+304]
|
|
|
- mov QWORD PTR [rcx+296], r9
|
|
|
- sbb r8, QWORD PTR [rdx+304]
|
|
|
- mov r9, QWORD PTR [rcx+312]
|
|
|
- mov QWORD PTR [rcx+304], r8
|
|
|
- sbb r9, QWORD PTR [rdx+312]
|
|
|
- mov r8, QWORD PTR [rcx+320]
|
|
|
- mov QWORD PTR [rcx+312], r9
|
|
|
- sbb r8, QWORD PTR [rdx+320]
|
|
|
- mov r9, QWORD PTR [rcx+328]
|
|
|
- mov QWORD PTR [rcx+320], r8
|
|
|
- sbb r9, QWORD PTR [rdx+328]
|
|
|
- mov r8, QWORD PTR [rcx+336]
|
|
|
- mov QWORD PTR [rcx+328], r9
|
|
|
- sbb r8, QWORD PTR [rdx+336]
|
|
|
- mov r9, QWORD PTR [rcx+344]
|
|
|
- mov QWORD PTR [rcx+336], r8
|
|
|
- sbb r9, QWORD PTR [rdx+344]
|
|
|
- mov r8, QWORD PTR [rcx+352]
|
|
|
- mov QWORD PTR [rcx+344], r9
|
|
|
- sbb r8, QWORD PTR [rdx+352]
|
|
|
- mov r9, QWORD PTR [rcx+360]
|
|
|
- mov QWORD PTR [rcx+352], r8
|
|
|
- sbb r9, QWORD PTR [rdx+360]
|
|
|
- mov r8, QWORD PTR [rcx+368]
|
|
|
- mov QWORD PTR [rcx+360], r9
|
|
|
- sbb r8, QWORD PTR [rdx+368]
|
|
|
- mov r9, QWORD PTR [rcx+376]
|
|
|
- mov QWORD PTR [rcx+368], r8
|
|
|
- sbb r9, QWORD PTR [rdx+376]
|
|
|
- mov r8, QWORD PTR [rcx+384]
|
|
|
- mov QWORD PTR [rcx+376], r9
|
|
|
- sbb r8, QWORD PTR [rdx+384]
|
|
|
- mov r9, QWORD PTR [rcx+392]
|
|
|
- mov QWORD PTR [rcx+384], r8
|
|
|
- sbb r9, QWORD PTR [rdx+392]
|
|
|
- mov r8, QWORD PTR [rcx+400]
|
|
|
- mov QWORD PTR [rcx+392], r9
|
|
|
- sbb r8, QWORD PTR [rdx+400]
|
|
|
- mov r9, QWORD PTR [rcx+408]
|
|
|
- mov QWORD PTR [rcx+400], r8
|
|
|
- sbb r9, QWORD PTR [rdx+408]
|
|
|
- mov r8, QWORD PTR [rcx+416]
|
|
|
- mov QWORD PTR [rcx+408], r9
|
|
|
- sbb r8, QWORD PTR [rdx+416]
|
|
|
- mov r9, QWORD PTR [rcx+424]
|
|
|
- mov QWORD PTR [rcx+416], r8
|
|
|
- sbb r9, QWORD PTR [rdx+424]
|
|
|
- mov r8, QWORD PTR [rcx+432]
|
|
|
- mov QWORD PTR [rcx+424], r9
|
|
|
- sbb r8, QWORD PTR [rdx+432]
|
|
|
- mov r9, QWORD PTR [rcx+440]
|
|
|
- mov QWORD PTR [rcx+432], r8
|
|
|
- sbb r9, QWORD PTR [rdx+440]
|
|
|
- mov r8, QWORD PTR [rcx+448]
|
|
|
- mov QWORD PTR [rcx+440], r9
|
|
|
- sbb r8, QWORD PTR [rdx+448]
|
|
|
- mov r9, QWORD PTR [rcx+456]
|
|
|
- mov QWORD PTR [rcx+448], r8
|
|
|
- sbb r9, QWORD PTR [rdx+456]
|
|
|
- mov r8, QWORD PTR [rcx+464]
|
|
|
- mov QWORD PTR [rcx+456], r9
|
|
|
- sbb r8, QWORD PTR [rdx+464]
|
|
|
- mov r9, QWORD PTR [rcx+472]
|
|
|
- mov QWORD PTR [rcx+464], r8
|
|
|
- sbb r9, QWORD PTR [rdx+472]
|
|
|
- mov r8, QWORD PTR [rcx+480]
|
|
|
- mov QWORD PTR [rcx+472], r9
|
|
|
- sbb r8, QWORD PTR [rdx+480]
|
|
|
- mov r9, QWORD PTR [rcx+488]
|
|
|
- mov QWORD PTR [rcx+480], r8
|
|
|
- sbb r9, QWORD PTR [rdx+488]
|
|
|
- mov r8, QWORD PTR [rcx+496]
|
|
|
- mov QWORD PTR [rcx+488], r9
|
|
|
- sbb r8, QWORD PTR [rdx+496]
|
|
|
- mov r9, QWORD PTR [rcx+504]
|
|
|
- mov QWORD PTR [rcx+496], r8
|
|
|
- sbb r9, QWORD PTR [rdx+504]
|
|
|
- mov QWORD PTR [rcx+504], r9
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_4096_sub_in_place_64 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Add b to a into r. (r = a + b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_add_64 PROC
|
|
|
- ; Add
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- xor rax, rax
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- adc r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- adc r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- adc r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- adc r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- adc r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- adc r9, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- adc r10, QWORD PTR [r8+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r9, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+80], r9
|
|
|
- adc r10, QWORD PTR [r8+88]
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- adc r9, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [rcx+96], r9
|
|
|
- adc r10, QWORD PTR [r8+104]
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- adc r9, QWORD PTR [r8+112]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+112], r9
|
|
|
- adc r10, QWORD PTR [r8+120]
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc r9, QWORD PTR [r8+128]
|
|
|
- mov r10, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [rcx+128], r9
|
|
|
- adc r10, QWORD PTR [r8+136]
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [rcx+136], r10
|
|
|
- adc r9, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [rcx+144], r9
|
|
|
- adc r10, QWORD PTR [r8+152]
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [rcx+152], r10
|
|
|
- adc r9, QWORD PTR [r8+160]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [rcx+160], r9
|
|
|
- adc r10, QWORD PTR [r8+168]
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- adc r9, QWORD PTR [r8+176]
|
|
|
- mov r10, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [rcx+176], r9
|
|
|
- adc r10, QWORD PTR [r8+184]
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- mov QWORD PTR [rcx+184], r10
|
|
|
- adc r9, QWORD PTR [r8+192]
|
|
|
- mov r10, QWORD PTR [rdx+200]
|
|
|
- mov QWORD PTR [rcx+192], r9
|
|
|
- adc r10, QWORD PTR [r8+200]
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- mov QWORD PTR [rcx+200], r10
|
|
|
- adc r9, QWORD PTR [r8+208]
|
|
|
- mov r10, QWORD PTR [rdx+216]
|
|
|
- mov QWORD PTR [rcx+208], r9
|
|
|
- adc r10, QWORD PTR [r8+216]
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- adc r9, QWORD PTR [r8+224]
|
|
|
- mov r10, QWORD PTR [rdx+232]
|
|
|
- mov QWORD PTR [rcx+224], r9
|
|
|
- adc r10, QWORD PTR [r8+232]
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- mov QWORD PTR [rcx+232], r10
|
|
|
- adc r9, QWORD PTR [r8+240]
|
|
|
- mov r10, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [rcx+240], r9
|
|
|
- adc r10, QWORD PTR [r8+248]
|
|
|
- mov r9, QWORD PTR [rdx+256]
|
|
|
- mov QWORD PTR [rcx+248], r10
|
|
|
- adc r9, QWORD PTR [r8+256]
|
|
|
- mov r10, QWORD PTR [rdx+264]
|
|
|
- mov QWORD PTR [rcx+256], r9
|
|
|
- adc r10, QWORD PTR [r8+264]
|
|
|
- mov r9, QWORD PTR [rdx+272]
|
|
|
- mov QWORD PTR [rcx+264], r10
|
|
|
- adc r9, QWORD PTR [r8+272]
|
|
|
- mov r10, QWORD PTR [rdx+280]
|
|
|
- mov QWORD PTR [rcx+272], r9
|
|
|
- adc r10, QWORD PTR [r8+280]
|
|
|
- mov r9, QWORD PTR [rdx+288]
|
|
|
- mov QWORD PTR [rcx+280], r10
|
|
|
- adc r9, QWORD PTR [r8+288]
|
|
|
- mov r10, QWORD PTR [rdx+296]
|
|
|
- mov QWORD PTR [rcx+288], r9
|
|
|
- adc r10, QWORD PTR [r8+296]
|
|
|
- mov r9, QWORD PTR [rdx+304]
|
|
|
- mov QWORD PTR [rcx+296], r10
|
|
|
- adc r9, QWORD PTR [r8+304]
|
|
|
- mov r10, QWORD PTR [rdx+312]
|
|
|
- mov QWORD PTR [rcx+304], r9
|
|
|
- adc r10, QWORD PTR [r8+312]
|
|
|
- mov r9, QWORD PTR [rdx+320]
|
|
|
- mov QWORD PTR [rcx+312], r10
|
|
|
- adc r9, QWORD PTR [r8+320]
|
|
|
- mov r10, QWORD PTR [rdx+328]
|
|
|
- mov QWORD PTR [rcx+320], r9
|
|
|
- adc r10, QWORD PTR [r8+328]
|
|
|
- mov r9, QWORD PTR [rdx+336]
|
|
|
- mov QWORD PTR [rcx+328], r10
|
|
|
- adc r9, QWORD PTR [r8+336]
|
|
|
- mov r10, QWORD PTR [rdx+344]
|
|
|
- mov QWORD PTR [rcx+336], r9
|
|
|
- adc r10, QWORD PTR [r8+344]
|
|
|
- mov r9, QWORD PTR [rdx+352]
|
|
|
- mov QWORD PTR [rcx+344], r10
|
|
|
- adc r9, QWORD PTR [r8+352]
|
|
|
- mov r10, QWORD PTR [rdx+360]
|
|
|
- mov QWORD PTR [rcx+352], r9
|
|
|
- adc r10, QWORD PTR [r8+360]
|
|
|
- mov r9, QWORD PTR [rdx+368]
|
|
|
- mov QWORD PTR [rcx+360], r10
|
|
|
- adc r9, QWORD PTR [r8+368]
|
|
|
- mov r10, QWORD PTR [rdx+376]
|
|
|
- mov QWORD PTR [rcx+368], r9
|
|
|
- adc r10, QWORD PTR [r8+376]
|
|
|
- mov r9, QWORD PTR [rdx+384]
|
|
|
- mov QWORD PTR [rcx+376], r10
|
|
|
- adc r9, QWORD PTR [r8+384]
|
|
|
- mov r10, QWORD PTR [rdx+392]
|
|
|
- mov QWORD PTR [rcx+384], r9
|
|
|
- adc r10, QWORD PTR [r8+392]
|
|
|
- mov r9, QWORD PTR [rdx+400]
|
|
|
- mov QWORD PTR [rcx+392], r10
|
|
|
- adc r9, QWORD PTR [r8+400]
|
|
|
- mov r10, QWORD PTR [rdx+408]
|
|
|
- mov QWORD PTR [rcx+400], r9
|
|
|
- adc r10, QWORD PTR [r8+408]
|
|
|
- mov r9, QWORD PTR [rdx+416]
|
|
|
- mov QWORD PTR [rcx+408], r10
|
|
|
- adc r9, QWORD PTR [r8+416]
|
|
|
- mov r10, QWORD PTR [rdx+424]
|
|
|
- mov QWORD PTR [rcx+416], r9
|
|
|
- adc r10, QWORD PTR [r8+424]
|
|
|
- mov r9, QWORD PTR [rdx+432]
|
|
|
- mov QWORD PTR [rcx+424], r10
|
|
|
- adc r9, QWORD PTR [r8+432]
|
|
|
- mov r10, QWORD PTR [rdx+440]
|
|
|
- mov QWORD PTR [rcx+432], r9
|
|
|
- adc r10, QWORD PTR [r8+440]
|
|
|
- mov r9, QWORD PTR [rdx+448]
|
|
|
- mov QWORD PTR [rcx+440], r10
|
|
|
- adc r9, QWORD PTR [r8+448]
|
|
|
- mov r10, QWORD PTR [rdx+456]
|
|
|
- mov QWORD PTR [rcx+448], r9
|
|
|
- adc r10, QWORD PTR [r8+456]
|
|
|
- mov r9, QWORD PTR [rdx+464]
|
|
|
- mov QWORD PTR [rcx+456], r10
|
|
|
- adc r9, QWORD PTR [r8+464]
|
|
|
- mov r10, QWORD PTR [rdx+472]
|
|
|
- mov QWORD PTR [rcx+464], r9
|
|
|
- adc r10, QWORD PTR [r8+472]
|
|
|
- mov r9, QWORD PTR [rdx+480]
|
|
|
- mov QWORD PTR [rcx+472], r10
|
|
|
- adc r9, QWORD PTR [r8+480]
|
|
|
- mov r10, QWORD PTR [rdx+488]
|
|
|
- mov QWORD PTR [rcx+480], r9
|
|
|
- adc r10, QWORD PTR [r8+488]
|
|
|
- mov r9, QWORD PTR [rdx+496]
|
|
|
- mov QWORD PTR [rcx+488], r10
|
|
|
- adc r9, QWORD PTR [r8+496]
|
|
|
- mov r10, QWORD PTR [rdx+504]
|
|
|
- mov QWORD PTR [rcx+496], r9
|
|
|
- adc r10, QWORD PTR [r8+504]
|
|
|
- mov QWORD PTR [rcx+504], r10
|
|
|
- adc rax, 0
|
|
|
- ret
|
|
|
-sp_4096_add_64 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_mul_64 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- sub rsp, 1576
|
|
|
- mov QWORD PTR [rsp+1536], rcx
|
|
|
- mov QWORD PTR [rsp+1544], rdx
|
|
|
- mov QWORD PTR [rsp+1552], r8
|
|
|
- lea r12, QWORD PTR [rsp+1024]
|
|
|
- lea r14, QWORD PTR [rdx+256]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- xor r15, r15
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- adc r9, QWORD PTR [r14+128]
|
|
|
- mov r10, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- adc r10, QWORD PTR [r14+136]
|
|
|
- mov rax, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- adc rax, QWORD PTR [r14+144]
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- adc r9, QWORD PTR [r14+152]
|
|
|
- mov r10, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- adc r10, QWORD PTR [r14+160]
|
|
|
- mov rax, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- adc rax, QWORD PTR [r14+168]
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- adc r9, QWORD PTR [r14+176]
|
|
|
- mov r10, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- adc r10, QWORD PTR [r14+184]
|
|
|
- mov rax, QWORD PTR [rdx+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- adc rax, QWORD PTR [r14+192]
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- adc r9, QWORD PTR [r14+200]
|
|
|
- mov r10, QWORD PTR [rdx+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- adc r10, QWORD PTR [r14+208]
|
|
|
- mov rax, QWORD PTR [rdx+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- adc rax, QWORD PTR [r14+216]
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- adc r9, QWORD PTR [r14+224]
|
|
|
- mov r10, QWORD PTR [rdx+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- adc r10, QWORD PTR [r14+232]
|
|
|
- mov rax, QWORD PTR [rdx+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- adc rax, QWORD PTR [r14+240]
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- adc r9, QWORD PTR [r14+248]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+1560], r15
|
|
|
- lea r13, QWORD PTR [rsp+1280]
|
|
|
- lea r14, QWORD PTR [r8+256]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- xor rdi, rdi
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [r8+8]
|
|
|
- mov QWORD PTR [r13], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [r13+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [r8+32]
|
|
|
- mov QWORD PTR [r13+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mov QWORD PTR [r13+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [r13+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mov QWORD PTR [r13+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [r8+80]
|
|
|
- mov QWORD PTR [r13+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mov QWORD PTR [r13+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [r8+104]
|
|
|
- mov QWORD PTR [r13+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov QWORD PTR [r13+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [r13+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov r9, QWORD PTR [r8+128]
|
|
|
- mov QWORD PTR [r13+120], rax
|
|
|
- adc r9, QWORD PTR [r14+128]
|
|
|
- mov r10, QWORD PTR [r8+136]
|
|
|
- mov QWORD PTR [r13+128], r9
|
|
|
- adc r10, QWORD PTR [r14+136]
|
|
|
- mov rax, QWORD PTR [r8+144]
|
|
|
- mov QWORD PTR [r13+136], r10
|
|
|
- adc rax, QWORD PTR [r14+144]
|
|
|
- mov r9, QWORD PTR [r8+152]
|
|
|
- mov QWORD PTR [r13+144], rax
|
|
|
- adc r9, QWORD PTR [r14+152]
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov QWORD PTR [r13+152], r9
|
|
|
- adc r10, QWORD PTR [r14+160]
|
|
|
- mov rax, QWORD PTR [r8+168]
|
|
|
- mov QWORD PTR [r13+160], r10
|
|
|
- adc rax, QWORD PTR [r14+168]
|
|
|
- mov r9, QWORD PTR [r8+176]
|
|
|
- mov QWORD PTR [r13+168], rax
|
|
|
- adc r9, QWORD PTR [r14+176]
|
|
|
- mov r10, QWORD PTR [r8+184]
|
|
|
- mov QWORD PTR [r13+176], r9
|
|
|
- adc r10, QWORD PTR [r14+184]
|
|
|
- mov rax, QWORD PTR [r8+192]
|
|
|
- mov QWORD PTR [r13+184], r10
|
|
|
- adc rax, QWORD PTR [r14+192]
|
|
|
- mov r9, QWORD PTR [r8+200]
|
|
|
- mov QWORD PTR [r13+192], rax
|
|
|
- adc r9, QWORD PTR [r14+200]
|
|
|
- mov r10, QWORD PTR [r8+208]
|
|
|
- mov QWORD PTR [r13+200], r9
|
|
|
- adc r10, QWORD PTR [r14+208]
|
|
|
- mov rax, QWORD PTR [r8+216]
|
|
|
- mov QWORD PTR [r13+208], r10
|
|
|
- adc rax, QWORD PTR [r14+216]
|
|
|
- mov r9, QWORD PTR [r8+224]
|
|
|
- mov QWORD PTR [r13+216], rax
|
|
|
- adc r9, QWORD PTR [r14+224]
|
|
|
- mov r10, QWORD PTR [r8+232]
|
|
|
- mov QWORD PTR [r13+224], r9
|
|
|
- adc r10, QWORD PTR [r14+232]
|
|
|
- mov rax, QWORD PTR [r8+240]
|
|
|
- mov QWORD PTR [r13+232], r10
|
|
|
- adc rax, QWORD PTR [r14+240]
|
|
|
- mov r9, QWORD PTR [r8+248]
|
|
|
- mov QWORD PTR [r13+240], rax
|
|
|
- adc r9, QWORD PTR [r14+248]
|
|
|
- mov QWORD PTR [r13+248], r9
|
|
|
- adc rdi, 0
|
|
|
- mov QWORD PTR [rsp+1568], rdi
|
|
|
- mov r8, r13
|
|
|
- mov rdx, r12
|
|
|
- mov rcx, rsp
|
|
|
- call sp_2048_mul_32
|
|
|
- mov r8, QWORD PTR [rsp+1552]
|
|
|
- mov rdx, QWORD PTR [rsp+1544]
|
|
|
- lea rcx, QWORD PTR [rsp+512]
|
|
|
- add r8, 256
|
|
|
- add rdx, 256
|
|
|
- call sp_2048_mul_32
|
|
|
- mov r8, QWORD PTR [rsp+1552]
|
|
|
- mov rdx, QWORD PTR [rsp+1544]
|
|
|
- mov rcx, QWORD PTR [rsp+1536]
|
|
|
- call sp_2048_mul_32
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, QWORD PTR [rsp+1552]
|
|
|
- mov rdx, QWORD PTR [rsp+1544]
|
|
|
- mov rcx, QWORD PTR [rsp+1536]
|
|
|
-ENDIF
|
|
|
- mov r15, QWORD PTR [rsp+1560]
|
|
|
- mov rdi, QWORD PTR [rsp+1568]
|
|
|
- mov rsi, QWORD PTR [rsp+1536]
|
|
|
- mov r11, r15
|
|
|
- lea r12, QWORD PTR [rsp+1024]
|
|
|
- lea r13, QWORD PTR [rsp+1280]
|
|
|
- and r11, rdi
|
|
|
- neg r15
|
|
|
- neg rdi
|
|
|
- add rsi, 512
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [r13]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- mov QWORD PTR [r13], r9
|
|
|
- mov rax, QWORD PTR [r12+8]
|
|
|
- mov r9, QWORD PTR [r13+8]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+8], rax
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- mov rax, QWORD PTR [r12+16]
|
|
|
- mov r9, QWORD PTR [r13+16]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+16], rax
|
|
|
- mov QWORD PTR [r13+16], r9
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [r13+24]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- mov QWORD PTR [r13+24], r9
|
|
|
- mov rax, QWORD PTR [r12+32]
|
|
|
- mov r9, QWORD PTR [r13+32]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+32], rax
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- mov rax, QWORD PTR [r12+40]
|
|
|
- mov r9, QWORD PTR [r13+40]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+40], rax
|
|
|
- mov QWORD PTR [r13+40], r9
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [r13+48]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- mov QWORD PTR [r13+48], r9
|
|
|
- mov rax, QWORD PTR [r12+56]
|
|
|
- mov r9, QWORD PTR [r13+56]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+56], rax
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- mov rax, QWORD PTR [r12+64]
|
|
|
- mov r9, QWORD PTR [r13+64]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+64], rax
|
|
|
- mov QWORD PTR [r13+64], r9
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [r13+72]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- mov QWORD PTR [r13+72], r9
|
|
|
- mov rax, QWORD PTR [r12+80]
|
|
|
- mov r9, QWORD PTR [r13+80]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+80], rax
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- mov rax, QWORD PTR [r12+88]
|
|
|
- mov r9, QWORD PTR [r13+88]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+88], rax
|
|
|
- mov QWORD PTR [r13+88], r9
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [r13+96]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- mov QWORD PTR [r13+96], r9
|
|
|
- mov rax, QWORD PTR [r12+104]
|
|
|
- mov r9, QWORD PTR [r13+104]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+104], rax
|
|
|
- mov QWORD PTR [r13+104], r9
|
|
|
- mov rax, QWORD PTR [r12+112]
|
|
|
- mov r9, QWORD PTR [r13+112]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+112], rax
|
|
|
- mov QWORD PTR [r13+112], r9
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [r13+120]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- mov QWORD PTR [r13+120], r9
|
|
|
- mov rax, QWORD PTR [r12+128]
|
|
|
- mov r9, QWORD PTR [r13+128]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+128], rax
|
|
|
- mov QWORD PTR [r13+128], r9
|
|
|
- mov rax, QWORD PTR [r12+136]
|
|
|
- mov r9, QWORD PTR [r13+136]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+136], rax
|
|
|
- mov QWORD PTR [r13+136], r9
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [r13+144]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- mov QWORD PTR [r13+144], r9
|
|
|
- mov rax, QWORD PTR [r12+152]
|
|
|
- mov r9, QWORD PTR [r13+152]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+152], rax
|
|
|
- mov QWORD PTR [r13+152], r9
|
|
|
- mov rax, QWORD PTR [r12+160]
|
|
|
- mov r9, QWORD PTR [r13+160]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+160], rax
|
|
|
- mov QWORD PTR [r13+160], r9
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [r13+168]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- mov QWORD PTR [r13+168], r9
|
|
|
- mov rax, QWORD PTR [r12+176]
|
|
|
- mov r9, QWORD PTR [r13+176]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+176], rax
|
|
|
- mov QWORD PTR [r13+176], r9
|
|
|
- mov rax, QWORD PTR [r12+184]
|
|
|
- mov r9, QWORD PTR [r13+184]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+184], rax
|
|
|
- mov QWORD PTR [r13+184], r9
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov r9, QWORD PTR [r13+192]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- mov QWORD PTR [r13+192], r9
|
|
|
- mov rax, QWORD PTR [r12+200]
|
|
|
- mov r9, QWORD PTR [r13+200]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+200], rax
|
|
|
- mov QWORD PTR [r13+200], r9
|
|
|
- mov rax, QWORD PTR [r12+208]
|
|
|
- mov r9, QWORD PTR [r13+208]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+208], rax
|
|
|
- mov QWORD PTR [r13+208], r9
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov r9, QWORD PTR [r13+216]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- mov QWORD PTR [r13+216], r9
|
|
|
- mov rax, QWORD PTR [r12+224]
|
|
|
- mov r9, QWORD PTR [r13+224]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+224], rax
|
|
|
- mov QWORD PTR [r13+224], r9
|
|
|
- mov rax, QWORD PTR [r12+232]
|
|
|
- mov r9, QWORD PTR [r13+232]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+232], rax
|
|
|
- mov QWORD PTR [r13+232], r9
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov r9, QWORD PTR [r13+240]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- mov QWORD PTR [r13+240], r9
|
|
|
- mov rax, QWORD PTR [r12+248]
|
|
|
- mov r9, QWORD PTR [r13+248]
|
|
|
- and rax, rdi
|
|
|
- and r9, r15
|
|
|
- mov QWORD PTR [r12+248], rax
|
|
|
- mov QWORD PTR [r13+248], r9
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r13+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, QWORD PTR [r13+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- adc r9, QWORD PTR [r13+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [rsi+200], r9
|
|
|
- adc r10, QWORD PTR [r13+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [rsi+208], r10
|
|
|
- adc rax, QWORD PTR [r13+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [rsi+216], rax
|
|
|
- adc r9, QWORD PTR [r13+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [rsi+224], r9
|
|
|
- adc r10, QWORD PTR [r13+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [rsi+232], r10
|
|
|
- adc rax, QWORD PTR [r13+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [rsi+240], rax
|
|
|
- adc r9, QWORD PTR [r13+248]
|
|
|
- mov QWORD PTR [rsi+248], r9
|
|
|
- adc r11, 0
|
|
|
- lea r13, QWORD PTR [rsp+512]
|
|
|
- mov r12, rsp
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [r13+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [r13+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [r13+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [r13+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [r13+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [r13+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [r13+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [r13+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [r13+248]
|
|
|
- mov r10, QWORD PTR [r12+256]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r10, QWORD PTR [r13+256]
|
|
|
- mov rax, QWORD PTR [r12+264]
|
|
|
- mov QWORD PTR [r12+256], r10
|
|
|
- sbb rax, QWORD PTR [r13+264]
|
|
|
- mov r9, QWORD PTR [r12+272]
|
|
|
- mov QWORD PTR [r12+264], rax
|
|
|
- sbb r9, QWORD PTR [r13+272]
|
|
|
- mov r10, QWORD PTR [r12+280]
|
|
|
- mov QWORD PTR [r12+272], r9
|
|
|
- sbb r10, QWORD PTR [r13+280]
|
|
|
- mov rax, QWORD PTR [r12+288]
|
|
|
- mov QWORD PTR [r12+280], r10
|
|
|
- sbb rax, QWORD PTR [r13+288]
|
|
|
- mov r9, QWORD PTR [r12+296]
|
|
|
- mov QWORD PTR [r12+288], rax
|
|
|
- sbb r9, QWORD PTR [r13+296]
|
|
|
- mov r10, QWORD PTR [r12+304]
|
|
|
- mov QWORD PTR [r12+296], r9
|
|
|
- sbb r10, QWORD PTR [r13+304]
|
|
|
- mov rax, QWORD PTR [r12+312]
|
|
|
- mov QWORD PTR [r12+304], r10
|
|
|
- sbb rax, QWORD PTR [r13+312]
|
|
|
- mov r9, QWORD PTR [r12+320]
|
|
|
- mov QWORD PTR [r12+312], rax
|
|
|
- sbb r9, QWORD PTR [r13+320]
|
|
|
- mov r10, QWORD PTR [r12+328]
|
|
|
- mov QWORD PTR [r12+320], r9
|
|
|
- sbb r10, QWORD PTR [r13+328]
|
|
|
- mov rax, QWORD PTR [r12+336]
|
|
|
- mov QWORD PTR [r12+328], r10
|
|
|
- sbb rax, QWORD PTR [r13+336]
|
|
|
- mov r9, QWORD PTR [r12+344]
|
|
|
- mov QWORD PTR [r12+336], rax
|
|
|
- sbb r9, QWORD PTR [r13+344]
|
|
|
- mov r10, QWORD PTR [r12+352]
|
|
|
- mov QWORD PTR [r12+344], r9
|
|
|
- sbb r10, QWORD PTR [r13+352]
|
|
|
- mov rax, QWORD PTR [r12+360]
|
|
|
- mov QWORD PTR [r12+352], r10
|
|
|
- sbb rax, QWORD PTR [r13+360]
|
|
|
- mov r9, QWORD PTR [r12+368]
|
|
|
- mov QWORD PTR [r12+360], rax
|
|
|
- sbb r9, QWORD PTR [r13+368]
|
|
|
- mov r10, QWORD PTR [r12+376]
|
|
|
- mov QWORD PTR [r12+368], r9
|
|
|
- sbb r10, QWORD PTR [r13+376]
|
|
|
- mov rax, QWORD PTR [r12+384]
|
|
|
- mov QWORD PTR [r12+376], r10
|
|
|
- sbb rax, QWORD PTR [r13+384]
|
|
|
- mov r9, QWORD PTR [r12+392]
|
|
|
- mov QWORD PTR [r12+384], rax
|
|
|
- sbb r9, QWORD PTR [r13+392]
|
|
|
- mov r10, QWORD PTR [r12+400]
|
|
|
- mov QWORD PTR [r12+392], r9
|
|
|
- sbb r10, QWORD PTR [r13+400]
|
|
|
- mov rax, QWORD PTR [r12+408]
|
|
|
- mov QWORD PTR [r12+400], r10
|
|
|
- sbb rax, QWORD PTR [r13+408]
|
|
|
- mov r9, QWORD PTR [r12+416]
|
|
|
- mov QWORD PTR [r12+408], rax
|
|
|
- sbb r9, QWORD PTR [r13+416]
|
|
|
- mov r10, QWORD PTR [r12+424]
|
|
|
- mov QWORD PTR [r12+416], r9
|
|
|
- sbb r10, QWORD PTR [r13+424]
|
|
|
- mov rax, QWORD PTR [r12+432]
|
|
|
- mov QWORD PTR [r12+424], r10
|
|
|
- sbb rax, QWORD PTR [r13+432]
|
|
|
- mov r9, QWORD PTR [r12+440]
|
|
|
- mov QWORD PTR [r12+432], rax
|
|
|
- sbb r9, QWORD PTR [r13+440]
|
|
|
- mov r10, QWORD PTR [r12+448]
|
|
|
- mov QWORD PTR [r12+440], r9
|
|
|
- sbb r10, QWORD PTR [r13+448]
|
|
|
- mov rax, QWORD PTR [r12+456]
|
|
|
- mov QWORD PTR [r12+448], r10
|
|
|
- sbb rax, QWORD PTR [r13+456]
|
|
|
- mov r9, QWORD PTR [r12+464]
|
|
|
- mov QWORD PTR [r12+456], rax
|
|
|
- sbb r9, QWORD PTR [r13+464]
|
|
|
- mov r10, QWORD PTR [r12+472]
|
|
|
- mov QWORD PTR [r12+464], r9
|
|
|
- sbb r10, QWORD PTR [r13+472]
|
|
|
- mov rax, QWORD PTR [r12+480]
|
|
|
- mov QWORD PTR [r12+472], r10
|
|
|
- sbb rax, QWORD PTR [r13+480]
|
|
|
- mov r9, QWORD PTR [r12+488]
|
|
|
- mov QWORD PTR [r12+480], rax
|
|
|
- sbb r9, QWORD PTR [r13+488]
|
|
|
- mov r10, QWORD PTR [r12+496]
|
|
|
- mov QWORD PTR [r12+488], r9
|
|
|
- sbb r10, QWORD PTR [r13+496]
|
|
|
- mov rax, QWORD PTR [r12+504]
|
|
|
- mov QWORD PTR [r12+496], r10
|
|
|
- sbb rax, QWORD PTR [r13+504]
|
|
|
- mov QWORD PTR [r12+504], rax
|
|
|
- sbb r11, 0
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [rcx+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [rcx+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [rcx+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [rcx+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [rcx+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [rcx+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [rcx+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [rcx+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [rcx+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [rcx+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [rcx+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [rcx+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [rcx+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [rcx+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [rcx+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [rcx+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [rcx+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [rcx+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [rcx+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [rcx+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [rcx+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [rcx+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [rcx+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [rcx+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [rcx+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [rcx+248]
|
|
|
- mov r10, QWORD PTR [r12+256]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r10, QWORD PTR [rcx+256]
|
|
|
- mov rax, QWORD PTR [r12+264]
|
|
|
- mov QWORD PTR [r12+256], r10
|
|
|
- sbb rax, QWORD PTR [rcx+264]
|
|
|
- mov r9, QWORD PTR [r12+272]
|
|
|
- mov QWORD PTR [r12+264], rax
|
|
|
- sbb r9, QWORD PTR [rcx+272]
|
|
|
- mov r10, QWORD PTR [r12+280]
|
|
|
- mov QWORD PTR [r12+272], r9
|
|
|
- sbb r10, QWORD PTR [rcx+280]
|
|
|
- mov rax, QWORD PTR [r12+288]
|
|
|
- mov QWORD PTR [r12+280], r10
|
|
|
- sbb rax, QWORD PTR [rcx+288]
|
|
|
- mov r9, QWORD PTR [r12+296]
|
|
|
- mov QWORD PTR [r12+288], rax
|
|
|
- sbb r9, QWORD PTR [rcx+296]
|
|
|
- mov r10, QWORD PTR [r12+304]
|
|
|
- mov QWORD PTR [r12+296], r9
|
|
|
- sbb r10, QWORD PTR [rcx+304]
|
|
|
- mov rax, QWORD PTR [r12+312]
|
|
|
- mov QWORD PTR [r12+304], r10
|
|
|
- sbb rax, QWORD PTR [rcx+312]
|
|
|
- mov r9, QWORD PTR [r12+320]
|
|
|
- mov QWORD PTR [r12+312], rax
|
|
|
- sbb r9, QWORD PTR [rcx+320]
|
|
|
- mov r10, QWORD PTR [r12+328]
|
|
|
- mov QWORD PTR [r12+320], r9
|
|
|
- sbb r10, QWORD PTR [rcx+328]
|
|
|
- mov rax, QWORD PTR [r12+336]
|
|
|
- mov QWORD PTR [r12+328], r10
|
|
|
- sbb rax, QWORD PTR [rcx+336]
|
|
|
- mov r9, QWORD PTR [r12+344]
|
|
|
- mov QWORD PTR [r12+336], rax
|
|
|
- sbb r9, QWORD PTR [rcx+344]
|
|
|
- mov r10, QWORD PTR [r12+352]
|
|
|
- mov QWORD PTR [r12+344], r9
|
|
|
- sbb r10, QWORD PTR [rcx+352]
|
|
|
- mov rax, QWORD PTR [r12+360]
|
|
|
- mov QWORD PTR [r12+352], r10
|
|
|
- sbb rax, QWORD PTR [rcx+360]
|
|
|
- mov r9, QWORD PTR [r12+368]
|
|
|
- mov QWORD PTR [r12+360], rax
|
|
|
- sbb r9, QWORD PTR [rcx+368]
|
|
|
- mov r10, QWORD PTR [r12+376]
|
|
|
- mov QWORD PTR [r12+368], r9
|
|
|
- sbb r10, QWORD PTR [rcx+376]
|
|
|
- mov rax, QWORD PTR [r12+384]
|
|
|
- mov QWORD PTR [r12+376], r10
|
|
|
- sbb rax, QWORD PTR [rcx+384]
|
|
|
- mov r9, QWORD PTR [r12+392]
|
|
|
- mov QWORD PTR [r12+384], rax
|
|
|
- sbb r9, QWORD PTR [rcx+392]
|
|
|
- mov r10, QWORD PTR [r12+400]
|
|
|
- mov QWORD PTR [r12+392], r9
|
|
|
- sbb r10, QWORD PTR [rcx+400]
|
|
|
- mov rax, QWORD PTR [r12+408]
|
|
|
- mov QWORD PTR [r12+400], r10
|
|
|
- sbb rax, QWORD PTR [rcx+408]
|
|
|
- mov r9, QWORD PTR [r12+416]
|
|
|
- mov QWORD PTR [r12+408], rax
|
|
|
- sbb r9, QWORD PTR [rcx+416]
|
|
|
- mov r10, QWORD PTR [r12+424]
|
|
|
- mov QWORD PTR [r12+416], r9
|
|
|
- sbb r10, QWORD PTR [rcx+424]
|
|
|
- mov rax, QWORD PTR [r12+432]
|
|
|
- mov QWORD PTR [r12+424], r10
|
|
|
- sbb rax, QWORD PTR [rcx+432]
|
|
|
- mov r9, QWORD PTR [r12+440]
|
|
|
- mov QWORD PTR [r12+432], rax
|
|
|
- sbb r9, QWORD PTR [rcx+440]
|
|
|
- mov r10, QWORD PTR [r12+448]
|
|
|
- mov QWORD PTR [r12+440], r9
|
|
|
- sbb r10, QWORD PTR [rcx+448]
|
|
|
- mov rax, QWORD PTR [r12+456]
|
|
|
- mov QWORD PTR [r12+448], r10
|
|
|
- sbb rax, QWORD PTR [rcx+456]
|
|
|
- mov r9, QWORD PTR [r12+464]
|
|
|
- mov QWORD PTR [r12+456], rax
|
|
|
- sbb r9, QWORD PTR [rcx+464]
|
|
|
- mov r10, QWORD PTR [r12+472]
|
|
|
- mov QWORD PTR [r12+464], r9
|
|
|
- sbb r10, QWORD PTR [rcx+472]
|
|
|
- mov rax, QWORD PTR [r12+480]
|
|
|
- mov QWORD PTR [r12+472], r10
|
|
|
- sbb rax, QWORD PTR [rcx+480]
|
|
|
- mov r9, QWORD PTR [r12+488]
|
|
|
- mov QWORD PTR [r12+480], rax
|
|
|
- sbb r9, QWORD PTR [rcx+488]
|
|
|
- mov r10, QWORD PTR [r12+496]
|
|
|
- mov QWORD PTR [r12+488], r9
|
|
|
- sbb r10, QWORD PTR [rcx+496]
|
|
|
- mov rax, QWORD PTR [r12+504]
|
|
|
- mov QWORD PTR [r12+496], r10
|
|
|
- sbb rax, QWORD PTR [rcx+504]
|
|
|
- mov QWORD PTR [r12+504], rax
|
|
|
- sbb r11, 0
|
|
|
- sub rsi, 256
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r12+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r12+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r12+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r12+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r12+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r12+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r12+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r12+184]
|
|
|
- mov rax, QWORD PTR [rsi+192]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, QWORD PTR [r12+192]
|
|
|
- mov r9, QWORD PTR [rsi+200]
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- adc r9, QWORD PTR [r12+200]
|
|
|
- mov r10, QWORD PTR [rsi+208]
|
|
|
- mov QWORD PTR [rsi+200], r9
|
|
|
- adc r10, QWORD PTR [r12+208]
|
|
|
- mov rax, QWORD PTR [rsi+216]
|
|
|
- mov QWORD PTR [rsi+208], r10
|
|
|
- adc rax, QWORD PTR [r12+216]
|
|
|
- mov r9, QWORD PTR [rsi+224]
|
|
|
- mov QWORD PTR [rsi+216], rax
|
|
|
- adc r9, QWORD PTR [r12+224]
|
|
|
- mov r10, QWORD PTR [rsi+232]
|
|
|
- mov QWORD PTR [rsi+224], r9
|
|
|
- adc r10, QWORD PTR [r12+232]
|
|
|
- mov rax, QWORD PTR [rsi+240]
|
|
|
- mov QWORD PTR [rsi+232], r10
|
|
|
- adc rax, QWORD PTR [r12+240]
|
|
|
- mov r9, QWORD PTR [rsi+248]
|
|
|
- mov QWORD PTR [rsi+240], rax
|
|
|
- adc r9, QWORD PTR [r12+248]
|
|
|
- mov r10, QWORD PTR [rsi+256]
|
|
|
- mov QWORD PTR [rsi+248], r9
|
|
|
- adc r10, QWORD PTR [r12+256]
|
|
|
- mov rax, QWORD PTR [rsi+264]
|
|
|
- mov QWORD PTR [rsi+256], r10
|
|
|
- adc rax, QWORD PTR [r12+264]
|
|
|
- mov r9, QWORD PTR [rsi+272]
|
|
|
- mov QWORD PTR [rsi+264], rax
|
|
|
- adc r9, QWORD PTR [r12+272]
|
|
|
- mov r10, QWORD PTR [rsi+280]
|
|
|
- mov QWORD PTR [rsi+272], r9
|
|
|
- adc r10, QWORD PTR [r12+280]
|
|
|
- mov rax, QWORD PTR [rsi+288]
|
|
|
- mov QWORD PTR [rsi+280], r10
|
|
|
- adc rax, QWORD PTR [r12+288]
|
|
|
- mov r9, QWORD PTR [rsi+296]
|
|
|
- mov QWORD PTR [rsi+288], rax
|
|
|
- adc r9, QWORD PTR [r12+296]
|
|
|
- mov r10, QWORD PTR [rsi+304]
|
|
|
- mov QWORD PTR [rsi+296], r9
|
|
|
- adc r10, QWORD PTR [r12+304]
|
|
|
- mov rax, QWORD PTR [rsi+312]
|
|
|
- mov QWORD PTR [rsi+304], r10
|
|
|
- adc rax, QWORD PTR [r12+312]
|
|
|
- mov r9, QWORD PTR [rsi+320]
|
|
|
- mov QWORD PTR [rsi+312], rax
|
|
|
- adc r9, QWORD PTR [r12+320]
|
|
|
- mov r10, QWORD PTR [rsi+328]
|
|
|
- mov QWORD PTR [rsi+320], r9
|
|
|
- adc r10, QWORD PTR [r12+328]
|
|
|
- mov rax, QWORD PTR [rsi+336]
|
|
|
- mov QWORD PTR [rsi+328], r10
|
|
|
- adc rax, QWORD PTR [r12+336]
|
|
|
- mov r9, QWORD PTR [rsi+344]
|
|
|
- mov QWORD PTR [rsi+336], rax
|
|
|
- adc r9, QWORD PTR [r12+344]
|
|
|
- mov r10, QWORD PTR [rsi+352]
|
|
|
- mov QWORD PTR [rsi+344], r9
|
|
|
- adc r10, QWORD PTR [r12+352]
|
|
|
- mov rax, QWORD PTR [rsi+360]
|
|
|
- mov QWORD PTR [rsi+352], r10
|
|
|
- adc rax, QWORD PTR [r12+360]
|
|
|
- mov r9, QWORD PTR [rsi+368]
|
|
|
- mov QWORD PTR [rsi+360], rax
|
|
|
- adc r9, QWORD PTR [r12+368]
|
|
|
- mov r10, QWORD PTR [rsi+376]
|
|
|
- mov QWORD PTR [rsi+368], r9
|
|
|
- adc r10, QWORD PTR [r12+376]
|
|
|
- mov rax, QWORD PTR [rsi+384]
|
|
|
- mov QWORD PTR [rsi+376], r10
|
|
|
- adc rax, QWORD PTR [r12+384]
|
|
|
- mov r9, QWORD PTR [rsi+392]
|
|
|
- mov QWORD PTR [rsi+384], rax
|
|
|
- adc r9, QWORD PTR [r12+392]
|
|
|
- mov r10, QWORD PTR [rsi+400]
|
|
|
- mov QWORD PTR [rsi+392], r9
|
|
|
- adc r10, QWORD PTR [r12+400]
|
|
|
- mov rax, QWORD PTR [rsi+408]
|
|
|
- mov QWORD PTR [rsi+400], r10
|
|
|
- adc rax, QWORD PTR [r12+408]
|
|
|
- mov r9, QWORD PTR [rsi+416]
|
|
|
- mov QWORD PTR [rsi+408], rax
|
|
|
- adc r9, QWORD PTR [r12+416]
|
|
|
- mov r10, QWORD PTR [rsi+424]
|
|
|
- mov QWORD PTR [rsi+416], r9
|
|
|
- adc r10, QWORD PTR [r12+424]
|
|
|
- mov rax, QWORD PTR [rsi+432]
|
|
|
- mov QWORD PTR [rsi+424], r10
|
|
|
- adc rax, QWORD PTR [r12+432]
|
|
|
- mov r9, QWORD PTR [rsi+440]
|
|
|
- mov QWORD PTR [rsi+432], rax
|
|
|
- adc r9, QWORD PTR [r12+440]
|
|
|
- mov r10, QWORD PTR [rsi+448]
|
|
|
- mov QWORD PTR [rsi+440], r9
|
|
|
- adc r10, QWORD PTR [r12+448]
|
|
|
- mov rax, QWORD PTR [rsi+456]
|
|
|
- mov QWORD PTR [rsi+448], r10
|
|
|
- adc rax, QWORD PTR [r12+456]
|
|
|
- mov r9, QWORD PTR [rsi+464]
|
|
|
- mov QWORD PTR [rsi+456], rax
|
|
|
- adc r9, QWORD PTR [r12+464]
|
|
|
- mov r10, QWORD PTR [rsi+472]
|
|
|
- mov QWORD PTR [rsi+464], r9
|
|
|
- adc r10, QWORD PTR [r12+472]
|
|
|
- mov rax, QWORD PTR [rsi+480]
|
|
|
- mov QWORD PTR [rsi+472], r10
|
|
|
- adc rax, QWORD PTR [r12+480]
|
|
|
- mov r9, QWORD PTR [rsi+488]
|
|
|
- mov QWORD PTR [rsi+480], rax
|
|
|
- adc r9, QWORD PTR [r12+488]
|
|
|
- mov r10, QWORD PTR [rsi+496]
|
|
|
- mov QWORD PTR [rsi+488], r9
|
|
|
- adc r10, QWORD PTR [r12+496]
|
|
|
- mov rax, QWORD PTR [rsi+504]
|
|
|
- mov QWORD PTR [rsi+496], r10
|
|
|
- adc rax, QWORD PTR [r12+504]
|
|
|
- mov QWORD PTR [rsi+504], rax
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+768], r11
|
|
|
- add rsi, 256
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r13+184]
|
|
|
- mov rax, QWORD PTR [rsi+192]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, QWORD PTR [r13+192]
|
|
|
- mov r9, QWORD PTR [rsi+200]
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- adc r9, QWORD PTR [r13+200]
|
|
|
- mov r10, QWORD PTR [rsi+208]
|
|
|
- mov QWORD PTR [rsi+200], r9
|
|
|
- adc r10, QWORD PTR [r13+208]
|
|
|
- mov rax, QWORD PTR [rsi+216]
|
|
|
- mov QWORD PTR [rsi+208], r10
|
|
|
- adc rax, QWORD PTR [r13+216]
|
|
|
- mov r9, QWORD PTR [rsi+224]
|
|
|
- mov QWORD PTR [rsi+216], rax
|
|
|
- adc r9, QWORD PTR [r13+224]
|
|
|
- mov r10, QWORD PTR [rsi+232]
|
|
|
- mov QWORD PTR [rsi+224], r9
|
|
|
- adc r10, QWORD PTR [r13+232]
|
|
|
- mov rax, QWORD PTR [rsi+240]
|
|
|
- mov QWORD PTR [rsi+232], r10
|
|
|
- adc rax, QWORD PTR [r13+240]
|
|
|
- mov r9, QWORD PTR [rsi+248]
|
|
|
- mov QWORD PTR [rsi+240], rax
|
|
|
- adc r9, QWORD PTR [r13+248]
|
|
|
- mov r10, QWORD PTR [rsi+256]
|
|
|
- mov QWORD PTR [rsi+248], r9
|
|
|
- adc r10, QWORD PTR [r13+256]
|
|
|
- mov QWORD PTR [rsi+256], r10
|
|
|
- ; Add to zero
|
|
|
- mov rax, QWORD PTR [r13+264]
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+272]
|
|
|
- mov QWORD PTR [rsi+264], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+280]
|
|
|
- mov QWORD PTR [rsi+272], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+288]
|
|
|
- mov QWORD PTR [rsi+280], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+296]
|
|
|
- mov QWORD PTR [rsi+288], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+304]
|
|
|
- mov QWORD PTR [rsi+296], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+312]
|
|
|
- mov QWORD PTR [rsi+304], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+320]
|
|
|
- mov QWORD PTR [rsi+312], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+328]
|
|
|
- mov QWORD PTR [rsi+320], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+336]
|
|
|
- mov QWORD PTR [rsi+328], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+344]
|
|
|
- mov QWORD PTR [rsi+336], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+352]
|
|
|
- mov QWORD PTR [rsi+344], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+360]
|
|
|
- mov QWORD PTR [rsi+352], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+368]
|
|
|
- mov QWORD PTR [rsi+360], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+376]
|
|
|
- mov QWORD PTR [rsi+368], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+384]
|
|
|
- mov QWORD PTR [rsi+376], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+392]
|
|
|
- mov QWORD PTR [rsi+384], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+400]
|
|
|
- mov QWORD PTR [rsi+392], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+408]
|
|
|
- mov QWORD PTR [rsi+400], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+416]
|
|
|
- mov QWORD PTR [rsi+408], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+424]
|
|
|
- mov QWORD PTR [rsi+416], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+432]
|
|
|
- mov QWORD PTR [rsi+424], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+440]
|
|
|
- mov QWORD PTR [rsi+432], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+448]
|
|
|
- mov QWORD PTR [rsi+440], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+456]
|
|
|
- mov QWORD PTR [rsi+448], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+464]
|
|
|
- mov QWORD PTR [rsi+456], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+472]
|
|
|
- mov QWORD PTR [rsi+464], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+480]
|
|
|
- mov QWORD PTR [rsi+472], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+488]
|
|
|
- mov QWORD PTR [rsi+480], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+496]
|
|
|
- mov QWORD PTR [rsi+488], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+504]
|
|
|
- mov QWORD PTR [rsi+496], r10
|
|
|
- adc rax, 0
|
|
|
- mov QWORD PTR [rsi+504], rax
|
|
|
- add rsp, 1576
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_mul_64 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_mul_avx2_64 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- sub rsp, 1576
|
|
|
- mov QWORD PTR [rsp+1536], rcx
|
|
|
- mov QWORD PTR [rsp+1544], rdx
|
|
|
- mov QWORD PTR [rsp+1552], r8
|
|
|
- lea r12, QWORD PTR [rsp+1024]
|
|
|
- lea r14, QWORD PTR [rdx+256]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- xor r15, r15
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- adc r9, QWORD PTR [r14+128]
|
|
|
- mov r10, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- adc r10, QWORD PTR [r14+136]
|
|
|
- mov rax, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- adc rax, QWORD PTR [r14+144]
|
|
|
- mov r9, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- adc r9, QWORD PTR [r14+152]
|
|
|
- mov r10, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- adc r10, QWORD PTR [r14+160]
|
|
|
- mov rax, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- adc rax, QWORD PTR [r14+168]
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- adc r9, QWORD PTR [r14+176]
|
|
|
- mov r10, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- adc r10, QWORD PTR [r14+184]
|
|
|
- mov rax, QWORD PTR [rdx+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- adc rax, QWORD PTR [r14+192]
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- adc r9, QWORD PTR [r14+200]
|
|
|
- mov r10, QWORD PTR [rdx+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- adc r10, QWORD PTR [r14+208]
|
|
|
- mov rax, QWORD PTR [rdx+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- adc rax, QWORD PTR [r14+216]
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- adc r9, QWORD PTR [r14+224]
|
|
|
- mov r10, QWORD PTR [rdx+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- adc r10, QWORD PTR [r14+232]
|
|
|
- mov rax, QWORD PTR [rdx+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- adc rax, QWORD PTR [r14+240]
|
|
|
- mov r9, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- adc r9, QWORD PTR [r14+248]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+1560], r15
|
|
|
- lea r13, QWORD PTR [rsp+1280]
|
|
|
- lea r14, QWORD PTR [r8+256]
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- xor rdi, rdi
|
|
|
- add rax, QWORD PTR [r14]
|
|
|
- mov r9, QWORD PTR [r8+8]
|
|
|
- mov QWORD PTR [r13], rax
|
|
|
- adc r9, QWORD PTR [r14+8]
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov QWORD PTR [r13+8], r9
|
|
|
- adc r10, QWORD PTR [r14+16]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [r13+16], r10
|
|
|
- adc rax, QWORD PTR [r14+24]
|
|
|
- mov r9, QWORD PTR [r8+32]
|
|
|
- mov QWORD PTR [r13+24], rax
|
|
|
- adc r9, QWORD PTR [r14+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov QWORD PTR [r13+32], r9
|
|
|
- adc r10, QWORD PTR [r14+40]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mov QWORD PTR [r13+40], r10
|
|
|
- adc rax, QWORD PTR [r14+48]
|
|
|
- mov r9, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [r13+48], rax
|
|
|
- adc r9, QWORD PTR [r14+56]
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov QWORD PTR [r13+56], r9
|
|
|
- adc r10, QWORD PTR [r14+64]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mov QWORD PTR [r13+64], r10
|
|
|
- adc rax, QWORD PTR [r14+72]
|
|
|
- mov r9, QWORD PTR [r8+80]
|
|
|
- mov QWORD PTR [r13+72], rax
|
|
|
- adc r9, QWORD PTR [r14+80]
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [r13+80], r9
|
|
|
- adc r10, QWORD PTR [r14+88]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mov QWORD PTR [r13+88], r10
|
|
|
- adc rax, QWORD PTR [r14+96]
|
|
|
- mov r9, QWORD PTR [r8+104]
|
|
|
- mov QWORD PTR [r13+96], rax
|
|
|
- adc r9, QWORD PTR [r14+104]
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov QWORD PTR [r13+104], r9
|
|
|
- adc r10, QWORD PTR [r14+112]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [r13+112], r10
|
|
|
- adc rax, QWORD PTR [r14+120]
|
|
|
- mov r9, QWORD PTR [r8+128]
|
|
|
- mov QWORD PTR [r13+120], rax
|
|
|
- adc r9, QWORD PTR [r14+128]
|
|
|
- mov r10, QWORD PTR [r8+136]
|
|
|
- mov QWORD PTR [r13+128], r9
|
|
|
- adc r10, QWORD PTR [r14+136]
|
|
|
- mov rax, QWORD PTR [r8+144]
|
|
|
- mov QWORD PTR [r13+136], r10
|
|
|
- adc rax, QWORD PTR [r14+144]
|
|
|
- mov r9, QWORD PTR [r8+152]
|
|
|
- mov QWORD PTR [r13+144], rax
|
|
|
- adc r9, QWORD PTR [r14+152]
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov QWORD PTR [r13+152], r9
|
|
|
- adc r10, QWORD PTR [r14+160]
|
|
|
- mov rax, QWORD PTR [r8+168]
|
|
|
- mov QWORD PTR [r13+160], r10
|
|
|
- adc rax, QWORD PTR [r14+168]
|
|
|
- mov r9, QWORD PTR [r8+176]
|
|
|
- mov QWORD PTR [r13+168], rax
|
|
|
- adc r9, QWORD PTR [r14+176]
|
|
|
- mov r10, QWORD PTR [r8+184]
|
|
|
- mov QWORD PTR [r13+176], r9
|
|
|
- adc r10, QWORD PTR [r14+184]
|
|
|
- mov rax, QWORD PTR [r8+192]
|
|
|
- mov QWORD PTR [r13+184], r10
|
|
|
- adc rax, QWORD PTR [r14+192]
|
|
|
- mov r9, QWORD PTR [r8+200]
|
|
|
- mov QWORD PTR [r13+192], rax
|
|
|
- adc r9, QWORD PTR [r14+200]
|
|
|
- mov r10, QWORD PTR [r8+208]
|
|
|
- mov QWORD PTR [r13+200], r9
|
|
|
- adc r10, QWORD PTR [r14+208]
|
|
|
- mov rax, QWORD PTR [r8+216]
|
|
|
- mov QWORD PTR [r13+208], r10
|
|
|
- adc rax, QWORD PTR [r14+216]
|
|
|
- mov r9, QWORD PTR [r8+224]
|
|
|
- mov QWORD PTR [r13+216], rax
|
|
|
- adc r9, QWORD PTR [r14+224]
|
|
|
- mov r10, QWORD PTR [r8+232]
|
|
|
- mov QWORD PTR [r13+224], r9
|
|
|
- adc r10, QWORD PTR [r14+232]
|
|
|
- mov rax, QWORD PTR [r8+240]
|
|
|
- mov QWORD PTR [r13+232], r10
|
|
|
- adc rax, QWORD PTR [r14+240]
|
|
|
- mov r9, QWORD PTR [r8+248]
|
|
|
- mov QWORD PTR [r13+240], rax
|
|
|
- adc r9, QWORD PTR [r14+248]
|
|
|
- mov QWORD PTR [r13+248], r9
|
|
|
- adc rdi, 0
|
|
|
- mov QWORD PTR [rsp+1568], rdi
|
|
|
- mov r8, r13
|
|
|
- mov rdx, r12
|
|
|
- mov rcx, rsp
|
|
|
- call sp_2048_mul_avx2_32
|
|
|
- mov r8, QWORD PTR [rsp+1552]
|
|
|
- mov rdx, QWORD PTR [rsp+1544]
|
|
|
- lea rcx, QWORD PTR [rsp+512]
|
|
|
- add r8, 256
|
|
|
- add rdx, 256
|
|
|
- call sp_2048_mul_avx2_32
|
|
|
- mov r8, QWORD PTR [rsp+1552]
|
|
|
- mov rdx, QWORD PTR [rsp+1544]
|
|
|
- mov rcx, QWORD PTR [rsp+1536]
|
|
|
- call sp_2048_mul_avx2_32
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, QWORD PTR [rsp+1552]
|
|
|
- mov rdx, QWORD PTR [rsp+1544]
|
|
|
- mov rcx, QWORD PTR [rsp+1536]
|
|
|
-ENDIF
|
|
|
- mov r15, QWORD PTR [rsp+1560]
|
|
|
- mov rdi, QWORD PTR [rsp+1568]
|
|
|
- mov rsi, QWORD PTR [rsp+1536]
|
|
|
- mov r11, r15
|
|
|
- lea r12, QWORD PTR [rsp+1024]
|
|
|
- lea r13, QWORD PTR [rsp+1280]
|
|
|
- and r11, rdi
|
|
|
- neg r15
|
|
|
- neg rdi
|
|
|
- add rsi, 512
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [r13]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- add rax, r9
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [r13+8]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [r13+16]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [r13+24]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [r13+32]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [r13+40]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [r13+48]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [r13+56]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [r13+64]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [r13+72]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [r13+80]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [r13+88]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [r13+96]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov r10, QWORD PTR [r13+104]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov rax, QWORD PTR [r13+112]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [r13+120]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov r10, QWORD PTR [r13+128]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov rax, QWORD PTR [r13+136]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [r13+144]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov r10, QWORD PTR [r13+152]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov rax, QWORD PTR [r13+160]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [r13+168]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov r10, QWORD PTR [r13+176]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov rax, QWORD PTR [r13+184]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov r9, QWORD PTR [r13+192]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov r10, QWORD PTR [r13+200]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov rax, QWORD PTR [r13+208]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+200], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov r9, QWORD PTR [r13+216]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+208], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov r10, QWORD PTR [r13+224]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+216], rax
|
|
|
- adc r9, r10
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov rax, QWORD PTR [r13+232]
|
|
|
- pext r10, r10, rdi
|
|
|
- pext rax, rax, r15
|
|
|
- mov QWORD PTR [rsi+224], r9
|
|
|
- adc r10, rax
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov r9, QWORD PTR [r13+240]
|
|
|
- pext rax, rax, rdi
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [rsi+232], r10
|
|
|
- adc rax, r9
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov r10, QWORD PTR [r13+248]
|
|
|
- pext r9, r9, rdi
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [rsi+240], rax
|
|
|
- adc r9, r10
|
|
|
- mov QWORD PTR [rsi+248], r9
|
|
|
- adc r11, 0
|
|
|
- lea r13, QWORD PTR [rsp+512]
|
|
|
- mov r12, rsp
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [r13+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [r13+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [r13+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [r13+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [r13+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [r13+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [r13+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [r13+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [r13+248]
|
|
|
- mov r10, QWORD PTR [r12+256]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r10, QWORD PTR [r13+256]
|
|
|
- mov rax, QWORD PTR [r12+264]
|
|
|
- mov QWORD PTR [r12+256], r10
|
|
|
- sbb rax, QWORD PTR [r13+264]
|
|
|
- mov r9, QWORD PTR [r12+272]
|
|
|
- mov QWORD PTR [r12+264], rax
|
|
|
- sbb r9, QWORD PTR [r13+272]
|
|
|
- mov r10, QWORD PTR [r12+280]
|
|
|
- mov QWORD PTR [r12+272], r9
|
|
|
- sbb r10, QWORD PTR [r13+280]
|
|
|
- mov rax, QWORD PTR [r12+288]
|
|
|
- mov QWORD PTR [r12+280], r10
|
|
|
- sbb rax, QWORD PTR [r13+288]
|
|
|
- mov r9, QWORD PTR [r12+296]
|
|
|
- mov QWORD PTR [r12+288], rax
|
|
|
- sbb r9, QWORD PTR [r13+296]
|
|
|
- mov r10, QWORD PTR [r12+304]
|
|
|
- mov QWORD PTR [r12+296], r9
|
|
|
- sbb r10, QWORD PTR [r13+304]
|
|
|
- mov rax, QWORD PTR [r12+312]
|
|
|
- mov QWORD PTR [r12+304], r10
|
|
|
- sbb rax, QWORD PTR [r13+312]
|
|
|
- mov r9, QWORD PTR [r12+320]
|
|
|
- mov QWORD PTR [r12+312], rax
|
|
|
- sbb r9, QWORD PTR [r13+320]
|
|
|
- mov r10, QWORD PTR [r12+328]
|
|
|
- mov QWORD PTR [r12+320], r9
|
|
|
- sbb r10, QWORD PTR [r13+328]
|
|
|
- mov rax, QWORD PTR [r12+336]
|
|
|
- mov QWORD PTR [r12+328], r10
|
|
|
- sbb rax, QWORD PTR [r13+336]
|
|
|
- mov r9, QWORD PTR [r12+344]
|
|
|
- mov QWORD PTR [r12+336], rax
|
|
|
- sbb r9, QWORD PTR [r13+344]
|
|
|
- mov r10, QWORD PTR [r12+352]
|
|
|
- mov QWORD PTR [r12+344], r9
|
|
|
- sbb r10, QWORD PTR [r13+352]
|
|
|
- mov rax, QWORD PTR [r12+360]
|
|
|
- mov QWORD PTR [r12+352], r10
|
|
|
- sbb rax, QWORD PTR [r13+360]
|
|
|
- mov r9, QWORD PTR [r12+368]
|
|
|
- mov QWORD PTR [r12+360], rax
|
|
|
- sbb r9, QWORD PTR [r13+368]
|
|
|
- mov r10, QWORD PTR [r12+376]
|
|
|
- mov QWORD PTR [r12+368], r9
|
|
|
- sbb r10, QWORD PTR [r13+376]
|
|
|
- mov rax, QWORD PTR [r12+384]
|
|
|
- mov QWORD PTR [r12+376], r10
|
|
|
- sbb rax, QWORD PTR [r13+384]
|
|
|
- mov r9, QWORD PTR [r12+392]
|
|
|
- mov QWORD PTR [r12+384], rax
|
|
|
- sbb r9, QWORD PTR [r13+392]
|
|
|
- mov r10, QWORD PTR [r12+400]
|
|
|
- mov QWORD PTR [r12+392], r9
|
|
|
- sbb r10, QWORD PTR [r13+400]
|
|
|
- mov rax, QWORD PTR [r12+408]
|
|
|
- mov QWORD PTR [r12+400], r10
|
|
|
- sbb rax, QWORD PTR [r13+408]
|
|
|
- mov r9, QWORD PTR [r12+416]
|
|
|
- mov QWORD PTR [r12+408], rax
|
|
|
- sbb r9, QWORD PTR [r13+416]
|
|
|
- mov r10, QWORD PTR [r12+424]
|
|
|
- mov QWORD PTR [r12+416], r9
|
|
|
- sbb r10, QWORD PTR [r13+424]
|
|
|
- mov rax, QWORD PTR [r12+432]
|
|
|
- mov QWORD PTR [r12+424], r10
|
|
|
- sbb rax, QWORD PTR [r13+432]
|
|
|
- mov r9, QWORD PTR [r12+440]
|
|
|
- mov QWORD PTR [r12+432], rax
|
|
|
- sbb r9, QWORD PTR [r13+440]
|
|
|
- mov r10, QWORD PTR [r12+448]
|
|
|
- mov QWORD PTR [r12+440], r9
|
|
|
- sbb r10, QWORD PTR [r13+448]
|
|
|
- mov rax, QWORD PTR [r12+456]
|
|
|
- mov QWORD PTR [r12+448], r10
|
|
|
- sbb rax, QWORD PTR [r13+456]
|
|
|
- mov r9, QWORD PTR [r12+464]
|
|
|
- mov QWORD PTR [r12+456], rax
|
|
|
- sbb r9, QWORD PTR [r13+464]
|
|
|
- mov r10, QWORD PTR [r12+472]
|
|
|
- mov QWORD PTR [r12+464], r9
|
|
|
- sbb r10, QWORD PTR [r13+472]
|
|
|
- mov rax, QWORD PTR [r12+480]
|
|
|
- mov QWORD PTR [r12+472], r10
|
|
|
- sbb rax, QWORD PTR [r13+480]
|
|
|
- mov r9, QWORD PTR [r12+488]
|
|
|
- mov QWORD PTR [r12+480], rax
|
|
|
- sbb r9, QWORD PTR [r13+488]
|
|
|
- mov r10, QWORD PTR [r12+496]
|
|
|
- mov QWORD PTR [r12+488], r9
|
|
|
- sbb r10, QWORD PTR [r13+496]
|
|
|
- mov rax, QWORD PTR [r12+504]
|
|
|
- mov QWORD PTR [r12+496], r10
|
|
|
- sbb rax, QWORD PTR [r13+504]
|
|
|
- mov QWORD PTR [r12+504], rax
|
|
|
- sbb r11, 0
|
|
|
- mov rax, QWORD PTR [r12]
|
|
|
- sub rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [r12+8]
|
|
|
- mov QWORD PTR [r12], rax
|
|
|
- sbb r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [r12+16]
|
|
|
- mov QWORD PTR [r12+8], r9
|
|
|
- sbb r10, QWORD PTR [rcx+16]
|
|
|
- mov rax, QWORD PTR [r12+24]
|
|
|
- mov QWORD PTR [r12+16], r10
|
|
|
- sbb rax, QWORD PTR [rcx+24]
|
|
|
- mov r9, QWORD PTR [r12+32]
|
|
|
- mov QWORD PTR [r12+24], rax
|
|
|
- sbb r9, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [r12+40]
|
|
|
- mov QWORD PTR [r12+32], r9
|
|
|
- sbb r10, QWORD PTR [rcx+40]
|
|
|
- mov rax, QWORD PTR [r12+48]
|
|
|
- mov QWORD PTR [r12+40], r10
|
|
|
- sbb rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [r12+56]
|
|
|
- mov QWORD PTR [r12+48], rax
|
|
|
- sbb r9, QWORD PTR [rcx+56]
|
|
|
- mov r10, QWORD PTR [r12+64]
|
|
|
- mov QWORD PTR [r12+56], r9
|
|
|
- sbb r10, QWORD PTR [rcx+64]
|
|
|
- mov rax, QWORD PTR [r12+72]
|
|
|
- mov QWORD PTR [r12+64], r10
|
|
|
- sbb rax, QWORD PTR [rcx+72]
|
|
|
- mov r9, QWORD PTR [r12+80]
|
|
|
- mov QWORD PTR [r12+72], rax
|
|
|
- sbb r9, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [r12+88]
|
|
|
- mov QWORD PTR [r12+80], r9
|
|
|
- sbb r10, QWORD PTR [rcx+88]
|
|
|
- mov rax, QWORD PTR [r12+96]
|
|
|
- mov QWORD PTR [r12+88], r10
|
|
|
- sbb rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [r12+104]
|
|
|
- mov QWORD PTR [r12+96], rax
|
|
|
- sbb r9, QWORD PTR [rcx+104]
|
|
|
- mov r10, QWORD PTR [r12+112]
|
|
|
- mov QWORD PTR [r12+104], r9
|
|
|
- sbb r10, QWORD PTR [rcx+112]
|
|
|
- mov rax, QWORD PTR [r12+120]
|
|
|
- mov QWORD PTR [r12+112], r10
|
|
|
- sbb rax, QWORD PTR [rcx+120]
|
|
|
- mov r9, QWORD PTR [r12+128]
|
|
|
- mov QWORD PTR [r12+120], rax
|
|
|
- sbb r9, QWORD PTR [rcx+128]
|
|
|
- mov r10, QWORD PTR [r12+136]
|
|
|
- mov QWORD PTR [r12+128], r9
|
|
|
- sbb r10, QWORD PTR [rcx+136]
|
|
|
- mov rax, QWORD PTR [r12+144]
|
|
|
- mov QWORD PTR [r12+136], r10
|
|
|
- sbb rax, QWORD PTR [rcx+144]
|
|
|
- mov r9, QWORD PTR [r12+152]
|
|
|
- mov QWORD PTR [r12+144], rax
|
|
|
- sbb r9, QWORD PTR [rcx+152]
|
|
|
- mov r10, QWORD PTR [r12+160]
|
|
|
- mov QWORD PTR [r12+152], r9
|
|
|
- sbb r10, QWORD PTR [rcx+160]
|
|
|
- mov rax, QWORD PTR [r12+168]
|
|
|
- mov QWORD PTR [r12+160], r10
|
|
|
- sbb rax, QWORD PTR [rcx+168]
|
|
|
- mov r9, QWORD PTR [r12+176]
|
|
|
- mov QWORD PTR [r12+168], rax
|
|
|
- sbb r9, QWORD PTR [rcx+176]
|
|
|
- mov r10, QWORD PTR [r12+184]
|
|
|
- mov QWORD PTR [r12+176], r9
|
|
|
- sbb r10, QWORD PTR [rcx+184]
|
|
|
- mov rax, QWORD PTR [r12+192]
|
|
|
- mov QWORD PTR [r12+184], r10
|
|
|
- sbb rax, QWORD PTR [rcx+192]
|
|
|
- mov r9, QWORD PTR [r12+200]
|
|
|
- mov QWORD PTR [r12+192], rax
|
|
|
- sbb r9, QWORD PTR [rcx+200]
|
|
|
- mov r10, QWORD PTR [r12+208]
|
|
|
- mov QWORD PTR [r12+200], r9
|
|
|
- sbb r10, QWORD PTR [rcx+208]
|
|
|
- mov rax, QWORD PTR [r12+216]
|
|
|
- mov QWORD PTR [r12+208], r10
|
|
|
- sbb rax, QWORD PTR [rcx+216]
|
|
|
- mov r9, QWORD PTR [r12+224]
|
|
|
- mov QWORD PTR [r12+216], rax
|
|
|
- sbb r9, QWORD PTR [rcx+224]
|
|
|
- mov r10, QWORD PTR [r12+232]
|
|
|
- mov QWORD PTR [r12+224], r9
|
|
|
- sbb r10, QWORD PTR [rcx+232]
|
|
|
- mov rax, QWORD PTR [r12+240]
|
|
|
- mov QWORD PTR [r12+232], r10
|
|
|
- sbb rax, QWORD PTR [rcx+240]
|
|
|
- mov r9, QWORD PTR [r12+248]
|
|
|
- mov QWORD PTR [r12+240], rax
|
|
|
- sbb r9, QWORD PTR [rcx+248]
|
|
|
- mov r10, QWORD PTR [r12+256]
|
|
|
- mov QWORD PTR [r12+248], r9
|
|
|
- sbb r10, QWORD PTR [rcx+256]
|
|
|
- mov rax, QWORD PTR [r12+264]
|
|
|
- mov QWORD PTR [r12+256], r10
|
|
|
- sbb rax, QWORD PTR [rcx+264]
|
|
|
- mov r9, QWORD PTR [r12+272]
|
|
|
- mov QWORD PTR [r12+264], rax
|
|
|
- sbb r9, QWORD PTR [rcx+272]
|
|
|
- mov r10, QWORD PTR [r12+280]
|
|
|
- mov QWORD PTR [r12+272], r9
|
|
|
- sbb r10, QWORD PTR [rcx+280]
|
|
|
- mov rax, QWORD PTR [r12+288]
|
|
|
- mov QWORD PTR [r12+280], r10
|
|
|
- sbb rax, QWORD PTR [rcx+288]
|
|
|
- mov r9, QWORD PTR [r12+296]
|
|
|
- mov QWORD PTR [r12+288], rax
|
|
|
- sbb r9, QWORD PTR [rcx+296]
|
|
|
- mov r10, QWORD PTR [r12+304]
|
|
|
- mov QWORD PTR [r12+296], r9
|
|
|
- sbb r10, QWORD PTR [rcx+304]
|
|
|
- mov rax, QWORD PTR [r12+312]
|
|
|
- mov QWORD PTR [r12+304], r10
|
|
|
- sbb rax, QWORD PTR [rcx+312]
|
|
|
- mov r9, QWORD PTR [r12+320]
|
|
|
- mov QWORD PTR [r12+312], rax
|
|
|
- sbb r9, QWORD PTR [rcx+320]
|
|
|
- mov r10, QWORD PTR [r12+328]
|
|
|
- mov QWORD PTR [r12+320], r9
|
|
|
- sbb r10, QWORD PTR [rcx+328]
|
|
|
- mov rax, QWORD PTR [r12+336]
|
|
|
- mov QWORD PTR [r12+328], r10
|
|
|
- sbb rax, QWORD PTR [rcx+336]
|
|
|
- mov r9, QWORD PTR [r12+344]
|
|
|
- mov QWORD PTR [r12+336], rax
|
|
|
- sbb r9, QWORD PTR [rcx+344]
|
|
|
- mov r10, QWORD PTR [r12+352]
|
|
|
- mov QWORD PTR [r12+344], r9
|
|
|
- sbb r10, QWORD PTR [rcx+352]
|
|
|
- mov rax, QWORD PTR [r12+360]
|
|
|
- mov QWORD PTR [r12+352], r10
|
|
|
- sbb rax, QWORD PTR [rcx+360]
|
|
|
- mov r9, QWORD PTR [r12+368]
|
|
|
- mov QWORD PTR [r12+360], rax
|
|
|
- sbb r9, QWORD PTR [rcx+368]
|
|
|
- mov r10, QWORD PTR [r12+376]
|
|
|
- mov QWORD PTR [r12+368], r9
|
|
|
- sbb r10, QWORD PTR [rcx+376]
|
|
|
- mov rax, QWORD PTR [r12+384]
|
|
|
- mov QWORD PTR [r12+376], r10
|
|
|
- sbb rax, QWORD PTR [rcx+384]
|
|
|
- mov r9, QWORD PTR [r12+392]
|
|
|
- mov QWORD PTR [r12+384], rax
|
|
|
- sbb r9, QWORD PTR [rcx+392]
|
|
|
- mov r10, QWORD PTR [r12+400]
|
|
|
- mov QWORD PTR [r12+392], r9
|
|
|
- sbb r10, QWORD PTR [rcx+400]
|
|
|
- mov rax, QWORD PTR [r12+408]
|
|
|
- mov QWORD PTR [r12+400], r10
|
|
|
- sbb rax, QWORD PTR [rcx+408]
|
|
|
- mov r9, QWORD PTR [r12+416]
|
|
|
- mov QWORD PTR [r12+408], rax
|
|
|
- sbb r9, QWORD PTR [rcx+416]
|
|
|
- mov r10, QWORD PTR [r12+424]
|
|
|
- mov QWORD PTR [r12+416], r9
|
|
|
- sbb r10, QWORD PTR [rcx+424]
|
|
|
- mov rax, QWORD PTR [r12+432]
|
|
|
- mov QWORD PTR [r12+424], r10
|
|
|
- sbb rax, QWORD PTR [rcx+432]
|
|
|
- mov r9, QWORD PTR [r12+440]
|
|
|
- mov QWORD PTR [r12+432], rax
|
|
|
- sbb r9, QWORD PTR [rcx+440]
|
|
|
- mov r10, QWORD PTR [r12+448]
|
|
|
- mov QWORD PTR [r12+440], r9
|
|
|
- sbb r10, QWORD PTR [rcx+448]
|
|
|
- mov rax, QWORD PTR [r12+456]
|
|
|
- mov QWORD PTR [r12+448], r10
|
|
|
- sbb rax, QWORD PTR [rcx+456]
|
|
|
- mov r9, QWORD PTR [r12+464]
|
|
|
- mov QWORD PTR [r12+456], rax
|
|
|
- sbb r9, QWORD PTR [rcx+464]
|
|
|
- mov r10, QWORD PTR [r12+472]
|
|
|
- mov QWORD PTR [r12+464], r9
|
|
|
- sbb r10, QWORD PTR [rcx+472]
|
|
|
- mov rax, QWORD PTR [r12+480]
|
|
|
- mov QWORD PTR [r12+472], r10
|
|
|
- sbb rax, QWORD PTR [rcx+480]
|
|
|
- mov r9, QWORD PTR [r12+488]
|
|
|
- mov QWORD PTR [r12+480], rax
|
|
|
- sbb r9, QWORD PTR [rcx+488]
|
|
|
- mov r10, QWORD PTR [r12+496]
|
|
|
- mov QWORD PTR [r12+488], r9
|
|
|
- sbb r10, QWORD PTR [rcx+496]
|
|
|
- mov rax, QWORD PTR [r12+504]
|
|
|
- mov QWORD PTR [r12+496], r10
|
|
|
- sbb rax, QWORD PTR [rcx+504]
|
|
|
- mov QWORD PTR [r12+504], rax
|
|
|
- sbb r11, 0
|
|
|
- sub rsi, 256
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r12]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r12+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r12+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r12+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r12+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r12+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r12+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r12+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r12+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r12+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r12+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r12+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r12+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r12+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r12+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r12+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r12+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r12+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r12+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r12+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r12+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r12+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r12+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r12+184]
|
|
|
- mov rax, QWORD PTR [rsi+192]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, QWORD PTR [r12+192]
|
|
|
- mov r9, QWORD PTR [rsi+200]
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- adc r9, QWORD PTR [r12+200]
|
|
|
- mov r10, QWORD PTR [rsi+208]
|
|
|
- mov QWORD PTR [rsi+200], r9
|
|
|
- adc r10, QWORD PTR [r12+208]
|
|
|
- mov rax, QWORD PTR [rsi+216]
|
|
|
- mov QWORD PTR [rsi+208], r10
|
|
|
- adc rax, QWORD PTR [r12+216]
|
|
|
- mov r9, QWORD PTR [rsi+224]
|
|
|
- mov QWORD PTR [rsi+216], rax
|
|
|
- adc r9, QWORD PTR [r12+224]
|
|
|
- mov r10, QWORD PTR [rsi+232]
|
|
|
- mov QWORD PTR [rsi+224], r9
|
|
|
- adc r10, QWORD PTR [r12+232]
|
|
|
- mov rax, QWORD PTR [rsi+240]
|
|
|
- mov QWORD PTR [rsi+232], r10
|
|
|
- adc rax, QWORD PTR [r12+240]
|
|
|
- mov r9, QWORD PTR [rsi+248]
|
|
|
- mov QWORD PTR [rsi+240], rax
|
|
|
- adc r9, QWORD PTR [r12+248]
|
|
|
- mov r10, QWORD PTR [rsi+256]
|
|
|
- mov QWORD PTR [rsi+248], r9
|
|
|
- adc r10, QWORD PTR [r12+256]
|
|
|
- mov rax, QWORD PTR [rsi+264]
|
|
|
- mov QWORD PTR [rsi+256], r10
|
|
|
- adc rax, QWORD PTR [r12+264]
|
|
|
- mov r9, QWORD PTR [rsi+272]
|
|
|
- mov QWORD PTR [rsi+264], rax
|
|
|
- adc r9, QWORD PTR [r12+272]
|
|
|
- mov r10, QWORD PTR [rsi+280]
|
|
|
- mov QWORD PTR [rsi+272], r9
|
|
|
- adc r10, QWORD PTR [r12+280]
|
|
|
- mov rax, QWORD PTR [rsi+288]
|
|
|
- mov QWORD PTR [rsi+280], r10
|
|
|
- adc rax, QWORD PTR [r12+288]
|
|
|
- mov r9, QWORD PTR [rsi+296]
|
|
|
- mov QWORD PTR [rsi+288], rax
|
|
|
- adc r9, QWORD PTR [r12+296]
|
|
|
- mov r10, QWORD PTR [rsi+304]
|
|
|
- mov QWORD PTR [rsi+296], r9
|
|
|
- adc r10, QWORD PTR [r12+304]
|
|
|
- mov rax, QWORD PTR [rsi+312]
|
|
|
- mov QWORD PTR [rsi+304], r10
|
|
|
- adc rax, QWORD PTR [r12+312]
|
|
|
- mov r9, QWORD PTR [rsi+320]
|
|
|
- mov QWORD PTR [rsi+312], rax
|
|
|
- adc r9, QWORD PTR [r12+320]
|
|
|
- mov r10, QWORD PTR [rsi+328]
|
|
|
- mov QWORD PTR [rsi+320], r9
|
|
|
- adc r10, QWORD PTR [r12+328]
|
|
|
- mov rax, QWORD PTR [rsi+336]
|
|
|
- mov QWORD PTR [rsi+328], r10
|
|
|
- adc rax, QWORD PTR [r12+336]
|
|
|
- mov r9, QWORD PTR [rsi+344]
|
|
|
- mov QWORD PTR [rsi+336], rax
|
|
|
- adc r9, QWORD PTR [r12+344]
|
|
|
- mov r10, QWORD PTR [rsi+352]
|
|
|
- mov QWORD PTR [rsi+344], r9
|
|
|
- adc r10, QWORD PTR [r12+352]
|
|
|
- mov rax, QWORD PTR [rsi+360]
|
|
|
- mov QWORD PTR [rsi+352], r10
|
|
|
- adc rax, QWORD PTR [r12+360]
|
|
|
- mov r9, QWORD PTR [rsi+368]
|
|
|
- mov QWORD PTR [rsi+360], rax
|
|
|
- adc r9, QWORD PTR [r12+368]
|
|
|
- mov r10, QWORD PTR [rsi+376]
|
|
|
- mov QWORD PTR [rsi+368], r9
|
|
|
- adc r10, QWORD PTR [r12+376]
|
|
|
- mov rax, QWORD PTR [rsi+384]
|
|
|
- mov QWORD PTR [rsi+376], r10
|
|
|
- adc rax, QWORD PTR [r12+384]
|
|
|
- mov r9, QWORD PTR [rsi+392]
|
|
|
- mov QWORD PTR [rsi+384], rax
|
|
|
- adc r9, QWORD PTR [r12+392]
|
|
|
- mov r10, QWORD PTR [rsi+400]
|
|
|
- mov QWORD PTR [rsi+392], r9
|
|
|
- adc r10, QWORD PTR [r12+400]
|
|
|
- mov rax, QWORD PTR [rsi+408]
|
|
|
- mov QWORD PTR [rsi+400], r10
|
|
|
- adc rax, QWORD PTR [r12+408]
|
|
|
- mov r9, QWORD PTR [rsi+416]
|
|
|
- mov QWORD PTR [rsi+408], rax
|
|
|
- adc r9, QWORD PTR [r12+416]
|
|
|
- mov r10, QWORD PTR [rsi+424]
|
|
|
- mov QWORD PTR [rsi+416], r9
|
|
|
- adc r10, QWORD PTR [r12+424]
|
|
|
- mov rax, QWORD PTR [rsi+432]
|
|
|
- mov QWORD PTR [rsi+424], r10
|
|
|
- adc rax, QWORD PTR [r12+432]
|
|
|
- mov r9, QWORD PTR [rsi+440]
|
|
|
- mov QWORD PTR [rsi+432], rax
|
|
|
- adc r9, QWORD PTR [r12+440]
|
|
|
- mov r10, QWORD PTR [rsi+448]
|
|
|
- mov QWORD PTR [rsi+440], r9
|
|
|
- adc r10, QWORD PTR [r12+448]
|
|
|
- mov rax, QWORD PTR [rsi+456]
|
|
|
- mov QWORD PTR [rsi+448], r10
|
|
|
- adc rax, QWORD PTR [r12+456]
|
|
|
- mov r9, QWORD PTR [rsi+464]
|
|
|
- mov QWORD PTR [rsi+456], rax
|
|
|
- adc r9, QWORD PTR [r12+464]
|
|
|
- mov r10, QWORD PTR [rsi+472]
|
|
|
- mov QWORD PTR [rsi+464], r9
|
|
|
- adc r10, QWORD PTR [r12+472]
|
|
|
- mov rax, QWORD PTR [rsi+480]
|
|
|
- mov QWORD PTR [rsi+472], r10
|
|
|
- adc rax, QWORD PTR [r12+480]
|
|
|
- mov r9, QWORD PTR [rsi+488]
|
|
|
- mov QWORD PTR [rsi+480], rax
|
|
|
- adc r9, QWORD PTR [r12+488]
|
|
|
- mov r10, QWORD PTR [rsi+496]
|
|
|
- mov QWORD PTR [rsi+488], r9
|
|
|
- adc r10, QWORD PTR [r12+496]
|
|
|
- mov rax, QWORD PTR [rsi+504]
|
|
|
- mov QWORD PTR [rsi+496], r10
|
|
|
- adc rax, QWORD PTR [r12+504]
|
|
|
- mov QWORD PTR [rsi+504], rax
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+768], r11
|
|
|
- add rsi, 256
|
|
|
- ; Add
|
|
|
- mov rax, QWORD PTR [rsi]
|
|
|
- add rax, QWORD PTR [r13]
|
|
|
- mov r9, QWORD PTR [rsi+8]
|
|
|
- mov QWORD PTR [rsi], rax
|
|
|
- adc r9, QWORD PTR [r13+8]
|
|
|
- mov r10, QWORD PTR [rsi+16]
|
|
|
- mov QWORD PTR [rsi+8], r9
|
|
|
- adc r10, QWORD PTR [r13+16]
|
|
|
- mov rax, QWORD PTR [rsi+24]
|
|
|
- mov QWORD PTR [rsi+16], r10
|
|
|
- adc rax, QWORD PTR [r13+24]
|
|
|
- mov r9, QWORD PTR [rsi+32]
|
|
|
- mov QWORD PTR [rsi+24], rax
|
|
|
- adc r9, QWORD PTR [r13+32]
|
|
|
- mov r10, QWORD PTR [rsi+40]
|
|
|
- mov QWORD PTR [rsi+32], r9
|
|
|
- adc r10, QWORD PTR [r13+40]
|
|
|
- mov rax, QWORD PTR [rsi+48]
|
|
|
- mov QWORD PTR [rsi+40], r10
|
|
|
- adc rax, QWORD PTR [r13+48]
|
|
|
- mov r9, QWORD PTR [rsi+56]
|
|
|
- mov QWORD PTR [rsi+48], rax
|
|
|
- adc r9, QWORD PTR [r13+56]
|
|
|
- mov r10, QWORD PTR [rsi+64]
|
|
|
- mov QWORD PTR [rsi+56], r9
|
|
|
- adc r10, QWORD PTR [r13+64]
|
|
|
- mov rax, QWORD PTR [rsi+72]
|
|
|
- mov QWORD PTR [rsi+64], r10
|
|
|
- adc rax, QWORD PTR [r13+72]
|
|
|
- mov r9, QWORD PTR [rsi+80]
|
|
|
- mov QWORD PTR [rsi+72], rax
|
|
|
- adc r9, QWORD PTR [r13+80]
|
|
|
- mov r10, QWORD PTR [rsi+88]
|
|
|
- mov QWORD PTR [rsi+80], r9
|
|
|
- adc r10, QWORD PTR [r13+88]
|
|
|
- mov rax, QWORD PTR [rsi+96]
|
|
|
- mov QWORD PTR [rsi+88], r10
|
|
|
- adc rax, QWORD PTR [r13+96]
|
|
|
- mov r9, QWORD PTR [rsi+104]
|
|
|
- mov QWORD PTR [rsi+96], rax
|
|
|
- adc r9, QWORD PTR [r13+104]
|
|
|
- mov r10, QWORD PTR [rsi+112]
|
|
|
- mov QWORD PTR [rsi+104], r9
|
|
|
- adc r10, QWORD PTR [r13+112]
|
|
|
- mov rax, QWORD PTR [rsi+120]
|
|
|
- mov QWORD PTR [rsi+112], r10
|
|
|
- adc rax, QWORD PTR [r13+120]
|
|
|
- mov r9, QWORD PTR [rsi+128]
|
|
|
- mov QWORD PTR [rsi+120], rax
|
|
|
- adc r9, QWORD PTR [r13+128]
|
|
|
- mov r10, QWORD PTR [rsi+136]
|
|
|
- mov QWORD PTR [rsi+128], r9
|
|
|
- adc r10, QWORD PTR [r13+136]
|
|
|
- mov rax, QWORD PTR [rsi+144]
|
|
|
- mov QWORD PTR [rsi+136], r10
|
|
|
- adc rax, QWORD PTR [r13+144]
|
|
|
- mov r9, QWORD PTR [rsi+152]
|
|
|
- mov QWORD PTR [rsi+144], rax
|
|
|
- adc r9, QWORD PTR [r13+152]
|
|
|
- mov r10, QWORD PTR [rsi+160]
|
|
|
- mov QWORD PTR [rsi+152], r9
|
|
|
- adc r10, QWORD PTR [r13+160]
|
|
|
- mov rax, QWORD PTR [rsi+168]
|
|
|
- mov QWORD PTR [rsi+160], r10
|
|
|
- adc rax, QWORD PTR [r13+168]
|
|
|
- mov r9, QWORD PTR [rsi+176]
|
|
|
- mov QWORD PTR [rsi+168], rax
|
|
|
- adc r9, QWORD PTR [r13+176]
|
|
|
- mov r10, QWORD PTR [rsi+184]
|
|
|
- mov QWORD PTR [rsi+176], r9
|
|
|
- adc r10, QWORD PTR [r13+184]
|
|
|
- mov rax, QWORD PTR [rsi+192]
|
|
|
- mov QWORD PTR [rsi+184], r10
|
|
|
- adc rax, QWORD PTR [r13+192]
|
|
|
- mov r9, QWORD PTR [rsi+200]
|
|
|
- mov QWORD PTR [rsi+192], rax
|
|
|
- adc r9, QWORD PTR [r13+200]
|
|
|
- mov r10, QWORD PTR [rsi+208]
|
|
|
- mov QWORD PTR [rsi+200], r9
|
|
|
- adc r10, QWORD PTR [r13+208]
|
|
|
- mov rax, QWORD PTR [rsi+216]
|
|
|
- mov QWORD PTR [rsi+208], r10
|
|
|
- adc rax, QWORD PTR [r13+216]
|
|
|
- mov r9, QWORD PTR [rsi+224]
|
|
|
- mov QWORD PTR [rsi+216], rax
|
|
|
- adc r9, QWORD PTR [r13+224]
|
|
|
- mov r10, QWORD PTR [rsi+232]
|
|
|
- mov QWORD PTR [rsi+224], r9
|
|
|
- adc r10, QWORD PTR [r13+232]
|
|
|
- mov rax, QWORD PTR [rsi+240]
|
|
|
- mov QWORD PTR [rsi+232], r10
|
|
|
- adc rax, QWORD PTR [r13+240]
|
|
|
- mov r9, QWORD PTR [rsi+248]
|
|
|
- mov QWORD PTR [rsi+240], rax
|
|
|
- adc r9, QWORD PTR [r13+248]
|
|
|
- mov r10, QWORD PTR [rsi+256]
|
|
|
- mov QWORD PTR [rsi+248], r9
|
|
|
- adc r10, QWORD PTR [r13+256]
|
|
|
- mov QWORD PTR [rsi+256], r10
|
|
|
- ; Add to zero
|
|
|
- mov rax, QWORD PTR [r13+264]
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+272]
|
|
|
- mov QWORD PTR [rsi+264], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+280]
|
|
|
- mov QWORD PTR [rsi+272], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+288]
|
|
|
- mov QWORD PTR [rsi+280], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+296]
|
|
|
- mov QWORD PTR [rsi+288], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+304]
|
|
|
- mov QWORD PTR [rsi+296], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+312]
|
|
|
- mov QWORD PTR [rsi+304], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+320]
|
|
|
- mov QWORD PTR [rsi+312], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+328]
|
|
|
- mov QWORD PTR [rsi+320], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+336]
|
|
|
- mov QWORD PTR [rsi+328], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+344]
|
|
|
- mov QWORD PTR [rsi+336], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+352]
|
|
|
- mov QWORD PTR [rsi+344], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+360]
|
|
|
- mov QWORD PTR [rsi+352], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+368]
|
|
|
- mov QWORD PTR [rsi+360], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+376]
|
|
|
- mov QWORD PTR [rsi+368], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+384]
|
|
|
- mov QWORD PTR [rsi+376], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+392]
|
|
|
- mov QWORD PTR [rsi+384], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+400]
|
|
|
- mov QWORD PTR [rsi+392], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+408]
|
|
|
- mov QWORD PTR [rsi+400], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+416]
|
|
|
- mov QWORD PTR [rsi+408], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+424]
|
|
|
- mov QWORD PTR [rsi+416], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+432]
|
|
|
- mov QWORD PTR [rsi+424], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+440]
|
|
|
- mov QWORD PTR [rsi+432], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+448]
|
|
|
- mov QWORD PTR [rsi+440], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+456]
|
|
|
- mov QWORD PTR [rsi+448], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+464]
|
|
|
- mov QWORD PTR [rsi+456], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+472]
|
|
|
- mov QWORD PTR [rsi+464], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+480]
|
|
|
- mov QWORD PTR [rsi+472], r10
|
|
|
- adc rax, 0
|
|
|
- mov r9, QWORD PTR [r13+488]
|
|
|
- mov QWORD PTR [rsi+480], rax
|
|
|
- adc r9, 0
|
|
|
- mov r10, QWORD PTR [r13+496]
|
|
|
- mov QWORD PTR [rsi+488], r9
|
|
|
- adc r10, 0
|
|
|
- mov rax, QWORD PTR [r13+504]
|
|
|
- mov QWORD PTR [rsi+496], r10
|
|
|
- adc rax, 0
|
|
|
- mov QWORD PTR [rsi+504], rax
|
|
|
- add rsp, 1576
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_mul_avx2_64 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * Karatsuba: ah^2, al^2, (al - ah)^2
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_sqr_64 PROC
|
|
|
- sub rsp, 528
|
|
|
- mov QWORD PTR [rsp+512], rcx
|
|
|
- mov QWORD PTR [rsp+520], rdx
|
|
|
- mov r9, 0
|
|
|
- mov r10, rsp
|
|
|
- lea r11, QWORD PTR [rdx+256]
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- sub rax, QWORD PTR [r11]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- sbb r8, QWORD PTR [r11+8]
|
|
|
- mov rax, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- sbb rax, QWORD PTR [r11+16]
|
|
|
- mov r8, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- sbb r8, QWORD PTR [r11+24]
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- sbb rax, QWORD PTR [r11+32]
|
|
|
- mov r8, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- sbb r8, QWORD PTR [r11+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- sbb rax, QWORD PTR [r11+48]
|
|
|
- mov r8, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- sbb r8, QWORD PTR [r11+56]
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- sbb rax, QWORD PTR [r11+64]
|
|
|
- mov r8, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- sbb r8, QWORD PTR [r11+72]
|
|
|
- mov rax, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- sbb rax, QWORD PTR [r11+80]
|
|
|
- mov r8, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- sbb r8, QWORD PTR [r11+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- sbb rax, QWORD PTR [r11+96]
|
|
|
- mov r8, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- sbb r8, QWORD PTR [r11+104]
|
|
|
- mov rax, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- sbb rax, QWORD PTR [r11+112]
|
|
|
- mov r8, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- sbb r8, QWORD PTR [r11+120]
|
|
|
- mov rax, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- sbb rax, QWORD PTR [r11+128]
|
|
|
- mov r8, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [r10+128], rax
|
|
|
- sbb r8, QWORD PTR [r11+136]
|
|
|
- mov rax, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [r10+136], r8
|
|
|
- sbb rax, QWORD PTR [r11+144]
|
|
|
- mov r8, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [r10+144], rax
|
|
|
- sbb r8, QWORD PTR [r11+152]
|
|
|
- mov rax, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [r10+152], r8
|
|
|
- sbb rax, QWORD PTR [r11+160]
|
|
|
- mov r8, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [r10+160], rax
|
|
|
- sbb r8, QWORD PTR [r11+168]
|
|
|
- mov rax, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [r10+168], r8
|
|
|
- sbb rax, QWORD PTR [r11+176]
|
|
|
- mov r8, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r10+176], rax
|
|
|
- sbb r8, QWORD PTR [r11+184]
|
|
|
- mov rax, QWORD PTR [rdx+192]
|
|
|
- mov QWORD PTR [r10+184], r8
|
|
|
- sbb rax, QWORD PTR [r11+192]
|
|
|
- mov r8, QWORD PTR [rdx+200]
|
|
|
- mov QWORD PTR [r10+192], rax
|
|
|
- sbb r8, QWORD PTR [r11+200]
|
|
|
- mov rax, QWORD PTR [rdx+208]
|
|
|
- mov QWORD PTR [r10+200], r8
|
|
|
- sbb rax, QWORD PTR [r11+208]
|
|
|
- mov r8, QWORD PTR [rdx+216]
|
|
|
- mov QWORD PTR [r10+208], rax
|
|
|
- sbb r8, QWORD PTR [r11+216]
|
|
|
- mov rax, QWORD PTR [rdx+224]
|
|
|
- mov QWORD PTR [r10+216], r8
|
|
|
- sbb rax, QWORD PTR [r11+224]
|
|
|
- mov r8, QWORD PTR [rdx+232]
|
|
|
- mov QWORD PTR [r10+224], rax
|
|
|
- sbb r8, QWORD PTR [r11+232]
|
|
|
- mov rax, QWORD PTR [rdx+240]
|
|
|
- mov QWORD PTR [r10+232], r8
|
|
|
- sbb rax, QWORD PTR [r11+240]
|
|
|
- mov r8, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [r10+240], rax
|
|
|
- sbb r8, QWORD PTR [r11+248]
|
|
|
- mov QWORD PTR [r10+248], r8
|
|
|
- sbb r9, 0
|
|
|
- ; Cond Negate
|
|
|
- mov rax, QWORD PTR [r10]
|
|
|
- mov r11, r9
|
|
|
- xor rax, r9
|
|
|
- neg r11
|
|
|
- sub rax, r9
|
|
|
- mov r8, QWORD PTR [r10+8]
|
|
|
- sbb r11, 0
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+16]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+24]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+32]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+40]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+56]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+64]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+72]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+80]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+88]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+104]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+112]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+120]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+128]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+136]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+128], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+144]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+136], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+152]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+144], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+160]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+152], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+168]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+160], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+176]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+168], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+184]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+176], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+192]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+184], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+200]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+192], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+208]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+200], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+216]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+208], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+224]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+216], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+232]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+224], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+240]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+232], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+248]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+240], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov QWORD PTR [r10+248], r8
|
|
|
- mov rdx, r10
|
|
|
- mov rcx, rsp
|
|
|
- call sp_2048_sqr_32
|
|
|
- mov rdx, QWORD PTR [rsp+520]
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
- add rdx, 256
|
|
|
- add rcx, 512
|
|
|
- call sp_2048_sqr_32
|
|
|
- mov rdx, QWORD PTR [rsp+520]
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
- call sp_2048_sqr_32
|
|
|
-IFDEF _WIN64
|
|
|
- mov rdx, QWORD PTR [rsp+520]
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
-ENDIF
|
|
|
- mov rdx, QWORD PTR [rsp+512]
|
|
|
- lea r10, QWORD PTR [rsp+256]
|
|
|
- add rdx, 768
|
|
|
- mov r9, 0
|
|
|
- mov r8, QWORD PTR [r10+-256]
|
|
|
- sub r8, QWORD PTR [rdx+-256]
|
|
|
- mov rax, QWORD PTR [r10+-248]
|
|
|
- mov QWORD PTR [r10+-256], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-248]
|
|
|
- mov r8, QWORD PTR [r10+-240]
|
|
|
- mov QWORD PTR [r10+-248], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-240]
|
|
|
- mov rax, QWORD PTR [r10+-232]
|
|
|
- mov QWORD PTR [r10+-240], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-232]
|
|
|
- mov r8, QWORD PTR [r10+-224]
|
|
|
- mov QWORD PTR [r10+-232], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-224]
|
|
|
- mov rax, QWORD PTR [r10+-216]
|
|
|
- mov QWORD PTR [r10+-224], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-216]
|
|
|
- mov r8, QWORD PTR [r10+-208]
|
|
|
- mov QWORD PTR [r10+-216], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-208]
|
|
|
- mov rax, QWORD PTR [r10+-200]
|
|
|
- mov QWORD PTR [r10+-208], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-200]
|
|
|
- mov r8, QWORD PTR [r10+-192]
|
|
|
- mov QWORD PTR [r10+-200], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-192]
|
|
|
- mov rax, QWORD PTR [r10+-184]
|
|
|
- mov QWORD PTR [r10+-192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-184]
|
|
|
- mov r8, QWORD PTR [r10+-176]
|
|
|
- mov QWORD PTR [r10+-184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-176]
|
|
|
- mov rax, QWORD PTR [r10+-168]
|
|
|
- mov QWORD PTR [r10+-176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-168]
|
|
|
- mov r8, QWORD PTR [r10+-160]
|
|
|
- mov QWORD PTR [r10+-168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-160]
|
|
|
- mov rax, QWORD PTR [r10+-152]
|
|
|
- mov QWORD PTR [r10+-160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-152]
|
|
|
- mov r8, QWORD PTR [r10+-144]
|
|
|
- mov QWORD PTR [r10+-152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-144]
|
|
|
- mov rax, QWORD PTR [r10+-136]
|
|
|
- mov QWORD PTR [r10+-144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-136]
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- mov QWORD PTR [r10+-136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [r10+128]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov rax, QWORD PTR [r10+136]
|
|
|
- mov QWORD PTR [r10+128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [r10+144]
|
|
|
- mov QWORD PTR [r10+136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov rax, QWORD PTR [r10+152]
|
|
|
- mov QWORD PTR [r10+144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [r10+160]
|
|
|
- mov QWORD PTR [r10+152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov QWORD PTR [r10+160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [r10+176]
|
|
|
- mov QWORD PTR [r10+168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov rax, QWORD PTR [r10+184]
|
|
|
- mov QWORD PTR [r10+176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [r10+192]
|
|
|
- mov QWORD PTR [r10+184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+192]
|
|
|
- mov rax, QWORD PTR [r10+200]
|
|
|
- mov QWORD PTR [r10+192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+200]
|
|
|
- mov r8, QWORD PTR [r10+208]
|
|
|
- mov QWORD PTR [r10+200], rax
|
|
|
- sbb r8, QWORD PTR [rdx+208]
|
|
|
- mov rax, QWORD PTR [r10+216]
|
|
|
- mov QWORD PTR [r10+208], r8
|
|
|
- sbb rax, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [r10+224]
|
|
|
- mov QWORD PTR [r10+216], rax
|
|
|
- sbb r8, QWORD PTR [rdx+224]
|
|
|
- mov rax, QWORD PTR [r10+232]
|
|
|
- mov QWORD PTR [r10+224], r8
|
|
|
- sbb rax, QWORD PTR [rdx+232]
|
|
|
- mov r8, QWORD PTR [r10+240]
|
|
|
- mov QWORD PTR [r10+232], rax
|
|
|
- sbb r8, QWORD PTR [rdx+240]
|
|
|
- mov rax, QWORD PTR [r10+248]
|
|
|
- mov QWORD PTR [r10+240], r8
|
|
|
- sbb rax, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [r10+248], rax
|
|
|
- sbb r9, 0
|
|
|
- sub rdx, 512
|
|
|
- mov r8, QWORD PTR [r10+-256]
|
|
|
- sub r8, QWORD PTR [rdx+-256]
|
|
|
- mov rax, QWORD PTR [r10+-248]
|
|
|
- mov QWORD PTR [r10+-256], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-248]
|
|
|
- mov r8, QWORD PTR [r10+-240]
|
|
|
- mov QWORD PTR [r10+-248], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-240]
|
|
|
- mov rax, QWORD PTR [r10+-232]
|
|
|
- mov QWORD PTR [r10+-240], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-232]
|
|
|
- mov r8, QWORD PTR [r10+-224]
|
|
|
- mov QWORD PTR [r10+-232], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-224]
|
|
|
- mov rax, QWORD PTR [r10+-216]
|
|
|
- mov QWORD PTR [r10+-224], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-216]
|
|
|
- mov r8, QWORD PTR [r10+-208]
|
|
|
- mov QWORD PTR [r10+-216], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-208]
|
|
|
- mov rax, QWORD PTR [r10+-200]
|
|
|
- mov QWORD PTR [r10+-208], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-200]
|
|
|
- mov r8, QWORD PTR [r10+-192]
|
|
|
- mov QWORD PTR [r10+-200], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-192]
|
|
|
- mov rax, QWORD PTR [r10+-184]
|
|
|
- mov QWORD PTR [r10+-192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-184]
|
|
|
- mov r8, QWORD PTR [r10+-176]
|
|
|
- mov QWORD PTR [r10+-184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-176]
|
|
|
- mov rax, QWORD PTR [r10+-168]
|
|
|
- mov QWORD PTR [r10+-176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-168]
|
|
|
- mov r8, QWORD PTR [r10+-160]
|
|
|
- mov QWORD PTR [r10+-168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-160]
|
|
|
- mov rax, QWORD PTR [r10+-152]
|
|
|
- mov QWORD PTR [r10+-160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-152]
|
|
|
- mov r8, QWORD PTR [r10+-144]
|
|
|
- mov QWORD PTR [r10+-152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-144]
|
|
|
- mov rax, QWORD PTR [r10+-136]
|
|
|
- mov QWORD PTR [r10+-144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-136]
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- mov QWORD PTR [r10+-136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [r10+128]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov rax, QWORD PTR [r10+136]
|
|
|
- mov QWORD PTR [r10+128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [r10+144]
|
|
|
- mov QWORD PTR [r10+136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov rax, QWORD PTR [r10+152]
|
|
|
- mov QWORD PTR [r10+144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [r10+160]
|
|
|
- mov QWORD PTR [r10+152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov QWORD PTR [r10+160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [r10+176]
|
|
|
- mov QWORD PTR [r10+168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov rax, QWORD PTR [r10+184]
|
|
|
- mov QWORD PTR [r10+176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [r10+192]
|
|
|
- mov QWORD PTR [r10+184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+192]
|
|
|
- mov rax, QWORD PTR [r10+200]
|
|
|
- mov QWORD PTR [r10+192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+200]
|
|
|
- mov r8, QWORD PTR [r10+208]
|
|
|
- mov QWORD PTR [r10+200], rax
|
|
|
- sbb r8, QWORD PTR [rdx+208]
|
|
|
- mov rax, QWORD PTR [r10+216]
|
|
|
- mov QWORD PTR [r10+208], r8
|
|
|
- sbb rax, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [r10+224]
|
|
|
- mov QWORD PTR [r10+216], rax
|
|
|
- sbb r8, QWORD PTR [rdx+224]
|
|
|
- mov rax, QWORD PTR [r10+232]
|
|
|
- mov QWORD PTR [r10+224], r8
|
|
|
- sbb rax, QWORD PTR [rdx+232]
|
|
|
- mov r8, QWORD PTR [r10+240]
|
|
|
- mov QWORD PTR [r10+232], rax
|
|
|
- sbb r8, QWORD PTR [rdx+240]
|
|
|
- mov rax, QWORD PTR [r10+248]
|
|
|
- mov QWORD PTR [r10+240], r8
|
|
|
- sbb rax, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [r10+248], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
- neg r9
|
|
|
- add rcx, 512
|
|
|
- mov r8, QWORD PTR [rcx+-256]
|
|
|
- sub r8, QWORD PTR [r10+-256]
|
|
|
- mov rax, QWORD PTR [rcx+-248]
|
|
|
- mov QWORD PTR [rcx+-256], r8
|
|
|
- sbb rax, QWORD PTR [r10+-248]
|
|
|
- mov r8, QWORD PTR [rcx+-240]
|
|
|
- mov QWORD PTR [rcx+-248], rax
|
|
|
- sbb r8, QWORD PTR [r10+-240]
|
|
|
- mov rax, QWORD PTR [rcx+-232]
|
|
|
- mov QWORD PTR [rcx+-240], r8
|
|
|
- sbb rax, QWORD PTR [r10+-232]
|
|
|
- mov r8, QWORD PTR [rcx+-224]
|
|
|
- mov QWORD PTR [rcx+-232], rax
|
|
|
- sbb r8, QWORD PTR [r10+-224]
|
|
|
- mov rax, QWORD PTR [rcx+-216]
|
|
|
- mov QWORD PTR [rcx+-224], r8
|
|
|
- sbb rax, QWORD PTR [r10+-216]
|
|
|
- mov r8, QWORD PTR [rcx+-208]
|
|
|
- mov QWORD PTR [rcx+-216], rax
|
|
|
- sbb r8, QWORD PTR [r10+-208]
|
|
|
- mov rax, QWORD PTR [rcx+-200]
|
|
|
- mov QWORD PTR [rcx+-208], r8
|
|
|
- sbb rax, QWORD PTR [r10+-200]
|
|
|
- mov r8, QWORD PTR [rcx+-192]
|
|
|
- mov QWORD PTR [rcx+-200], rax
|
|
|
- sbb r8, QWORD PTR [r10+-192]
|
|
|
- mov rax, QWORD PTR [rcx+-184]
|
|
|
- mov QWORD PTR [rcx+-192], r8
|
|
|
- sbb rax, QWORD PTR [r10+-184]
|
|
|
- mov r8, QWORD PTR [rcx+-176]
|
|
|
- mov QWORD PTR [rcx+-184], rax
|
|
|
- sbb r8, QWORD PTR [r10+-176]
|
|
|
- mov rax, QWORD PTR [rcx+-168]
|
|
|
- mov QWORD PTR [rcx+-176], r8
|
|
|
- sbb rax, QWORD PTR [r10+-168]
|
|
|
- mov r8, QWORD PTR [rcx+-160]
|
|
|
- mov QWORD PTR [rcx+-168], rax
|
|
|
- sbb r8, QWORD PTR [r10+-160]
|
|
|
- mov rax, QWORD PTR [rcx+-152]
|
|
|
- mov QWORD PTR [rcx+-160], r8
|
|
|
- sbb rax, QWORD PTR [r10+-152]
|
|
|
- mov r8, QWORD PTR [rcx+-144]
|
|
|
- mov QWORD PTR [rcx+-152], rax
|
|
|
- sbb r8, QWORD PTR [r10+-144]
|
|
|
- mov rax, QWORD PTR [rcx+-136]
|
|
|
- mov QWORD PTR [rcx+-144], r8
|
|
|
- sbb rax, QWORD PTR [r10+-136]
|
|
|
- mov r8, QWORD PTR [rcx+-128]
|
|
|
- mov QWORD PTR [rcx+-136], rax
|
|
|
- sbb r8, QWORD PTR [r10+-128]
|
|
|
- mov rax, QWORD PTR [rcx+-120]
|
|
|
- mov QWORD PTR [rcx+-128], r8
|
|
|
- sbb rax, QWORD PTR [r10+-120]
|
|
|
- mov r8, QWORD PTR [rcx+-112]
|
|
|
- mov QWORD PTR [rcx+-120], rax
|
|
|
- sbb r8, QWORD PTR [r10+-112]
|
|
|
- mov rax, QWORD PTR [rcx+-104]
|
|
|
- mov QWORD PTR [rcx+-112], r8
|
|
|
- sbb rax, QWORD PTR [r10+-104]
|
|
|
- mov r8, QWORD PTR [rcx+-96]
|
|
|
- mov QWORD PTR [rcx+-104], rax
|
|
|
- sbb r8, QWORD PTR [r10+-96]
|
|
|
- mov rax, QWORD PTR [rcx+-88]
|
|
|
- mov QWORD PTR [rcx+-96], r8
|
|
|
- sbb rax, QWORD PTR [r10+-88]
|
|
|
- mov r8, QWORD PTR [rcx+-80]
|
|
|
- mov QWORD PTR [rcx+-88], rax
|
|
|
- sbb r8, QWORD PTR [r10+-80]
|
|
|
- mov rax, QWORD PTR [rcx+-72]
|
|
|
- mov QWORD PTR [rcx+-80], r8
|
|
|
- sbb rax, QWORD PTR [r10+-72]
|
|
|
- mov r8, QWORD PTR [rcx+-64]
|
|
|
- mov QWORD PTR [rcx+-72], rax
|
|
|
- sbb r8, QWORD PTR [r10+-64]
|
|
|
- mov rax, QWORD PTR [rcx+-56]
|
|
|
- mov QWORD PTR [rcx+-64], r8
|
|
|
- sbb rax, QWORD PTR [r10+-56]
|
|
|
- mov r8, QWORD PTR [rcx+-48]
|
|
|
- mov QWORD PTR [rcx+-56], rax
|
|
|
- sbb r8, QWORD PTR [r10+-48]
|
|
|
- mov rax, QWORD PTR [rcx+-40]
|
|
|
- mov QWORD PTR [rcx+-48], r8
|
|
|
- sbb rax, QWORD PTR [r10+-40]
|
|
|
- mov r8, QWORD PTR [rcx+-32]
|
|
|
- mov QWORD PTR [rcx+-40], rax
|
|
|
- sbb r8, QWORD PTR [r10+-32]
|
|
|
- mov rax, QWORD PTR [rcx+-24]
|
|
|
- mov QWORD PTR [rcx+-32], r8
|
|
|
- sbb rax, QWORD PTR [r10+-24]
|
|
|
- mov r8, QWORD PTR [rcx+-16]
|
|
|
- mov QWORD PTR [rcx+-24], rax
|
|
|
- sbb r8, QWORD PTR [r10+-16]
|
|
|
- mov rax, QWORD PTR [rcx+-8]
|
|
|
- mov QWORD PTR [rcx+-16], r8
|
|
|
- sbb rax, QWORD PTR [r10+-8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rcx+-8], rax
|
|
|
- sbb r8, QWORD PTR [r10]
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb rax, QWORD PTR [r10+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- sbb r8, QWORD PTR [r10+16]
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb rax, QWORD PTR [r10+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- sbb r8, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb rax, QWORD PTR [r10+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- sbb r8, QWORD PTR [r10+48]
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb rax, QWORD PTR [r10+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- sbb r8, QWORD PTR [r10+64]
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb rax, QWORD PTR [r10+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- sbb r8, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb rax, QWORD PTR [r10+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- sbb r8, QWORD PTR [r10+96]
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb rax, QWORD PTR [r10+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- sbb r8, QWORD PTR [r10+112]
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb rax, QWORD PTR [r10+120]
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- sbb r8, QWORD PTR [r10+128]
|
|
|
- mov rax, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- sbb rax, QWORD PTR [r10+136]
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], rax
|
|
|
- sbb r8, QWORD PTR [r10+144]
|
|
|
- mov rax, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- sbb rax, QWORD PTR [r10+152]
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], rax
|
|
|
- sbb r8, QWORD PTR [r10+160]
|
|
|
- mov rax, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- sbb rax, QWORD PTR [r10+168]
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], rax
|
|
|
- sbb r8, QWORD PTR [r10+176]
|
|
|
- mov rax, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- sbb rax, QWORD PTR [r10+184]
|
|
|
- mov r8, QWORD PTR [rcx+192]
|
|
|
- mov QWORD PTR [rcx+184], rax
|
|
|
- sbb r8, QWORD PTR [r10+192]
|
|
|
- mov rax, QWORD PTR [rcx+200]
|
|
|
- mov QWORD PTR [rcx+192], r8
|
|
|
- sbb rax, QWORD PTR [r10+200]
|
|
|
- mov r8, QWORD PTR [rcx+208]
|
|
|
- mov QWORD PTR [rcx+200], rax
|
|
|
- sbb r8, QWORD PTR [r10+208]
|
|
|
- mov rax, QWORD PTR [rcx+216]
|
|
|
- mov QWORD PTR [rcx+208], r8
|
|
|
- sbb rax, QWORD PTR [r10+216]
|
|
|
- mov r8, QWORD PTR [rcx+224]
|
|
|
- mov QWORD PTR [rcx+216], rax
|
|
|
- sbb r8, QWORD PTR [r10+224]
|
|
|
- mov rax, QWORD PTR [rcx+232]
|
|
|
- mov QWORD PTR [rcx+224], r8
|
|
|
- sbb rax, QWORD PTR [r10+232]
|
|
|
- mov r8, QWORD PTR [rcx+240]
|
|
|
- mov QWORD PTR [rcx+232], rax
|
|
|
- sbb r8, QWORD PTR [r10+240]
|
|
|
- mov rax, QWORD PTR [rcx+248]
|
|
|
- mov QWORD PTR [rcx+240], r8
|
|
|
- sbb rax, QWORD PTR [r10+248]
|
|
|
- mov QWORD PTR [rcx+248], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
- add rcx, 768
|
|
|
- ; Add in word
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- add r8, r9
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+192]
|
|
|
- mov QWORD PTR [rcx+184], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+200]
|
|
|
- mov QWORD PTR [rcx+192], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+208]
|
|
|
- mov QWORD PTR [rcx+200], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+216]
|
|
|
- mov QWORD PTR [rcx+208], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+224]
|
|
|
- mov QWORD PTR [rcx+216], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+232]
|
|
|
- mov QWORD PTR [rcx+224], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+240]
|
|
|
- mov QWORD PTR [rcx+232], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+248]
|
|
|
- mov QWORD PTR [rcx+240], r8
|
|
|
- adc rax, 0
|
|
|
- mov QWORD PTR [rcx+248], rax
|
|
|
- mov rdx, QWORD PTR [rsp+520]
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
- add rsp, 528
|
|
|
- ret
|
|
|
-sp_4096_sqr_64 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * Karatsuba: ah^2, al^2, (al - ah)^2
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_sqr_avx2_64 PROC
|
|
|
- sub rsp, 528
|
|
|
- mov QWORD PTR [rsp+512], rcx
|
|
|
- mov QWORD PTR [rsp+520], rdx
|
|
|
- mov r9, 0
|
|
|
- mov r10, rsp
|
|
|
- lea r11, QWORD PTR [rdx+256]
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- sub rax, QWORD PTR [r11]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- sbb r8, QWORD PTR [r11+8]
|
|
|
- mov rax, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- sbb rax, QWORD PTR [r11+16]
|
|
|
- mov r8, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- sbb r8, QWORD PTR [r11+24]
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- sbb rax, QWORD PTR [r11+32]
|
|
|
- mov r8, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- sbb r8, QWORD PTR [r11+40]
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- sbb rax, QWORD PTR [r11+48]
|
|
|
- mov r8, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- sbb r8, QWORD PTR [r11+56]
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- sbb rax, QWORD PTR [r11+64]
|
|
|
- mov r8, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- sbb r8, QWORD PTR [r11+72]
|
|
|
- mov rax, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- sbb rax, QWORD PTR [r11+80]
|
|
|
- mov r8, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- sbb r8, QWORD PTR [r11+88]
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- sbb rax, QWORD PTR [r11+96]
|
|
|
- mov r8, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- sbb r8, QWORD PTR [r11+104]
|
|
|
- mov rax, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- sbb rax, QWORD PTR [r11+112]
|
|
|
- mov r8, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- sbb r8, QWORD PTR [r11+120]
|
|
|
- mov rax, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- sbb rax, QWORD PTR [r11+128]
|
|
|
- mov r8, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [r10+128], rax
|
|
|
- sbb r8, QWORD PTR [r11+136]
|
|
|
- mov rax, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [r10+136], r8
|
|
|
- sbb rax, QWORD PTR [r11+144]
|
|
|
- mov r8, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [r10+144], rax
|
|
|
- sbb r8, QWORD PTR [r11+152]
|
|
|
- mov rax, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [r10+152], r8
|
|
|
- sbb rax, QWORD PTR [r11+160]
|
|
|
- mov r8, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [r10+160], rax
|
|
|
- sbb r8, QWORD PTR [r11+168]
|
|
|
- mov rax, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [r10+168], r8
|
|
|
- sbb rax, QWORD PTR [r11+176]
|
|
|
- mov r8, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [r10+176], rax
|
|
|
- sbb r8, QWORD PTR [r11+184]
|
|
|
- mov rax, QWORD PTR [rdx+192]
|
|
|
- mov QWORD PTR [r10+184], r8
|
|
|
- sbb rax, QWORD PTR [r11+192]
|
|
|
- mov r8, QWORD PTR [rdx+200]
|
|
|
- mov QWORD PTR [r10+192], rax
|
|
|
- sbb r8, QWORD PTR [r11+200]
|
|
|
- mov rax, QWORD PTR [rdx+208]
|
|
|
- mov QWORD PTR [r10+200], r8
|
|
|
- sbb rax, QWORD PTR [r11+208]
|
|
|
- mov r8, QWORD PTR [rdx+216]
|
|
|
- mov QWORD PTR [r10+208], rax
|
|
|
- sbb r8, QWORD PTR [r11+216]
|
|
|
- mov rax, QWORD PTR [rdx+224]
|
|
|
- mov QWORD PTR [r10+216], r8
|
|
|
- sbb rax, QWORD PTR [r11+224]
|
|
|
- mov r8, QWORD PTR [rdx+232]
|
|
|
- mov QWORD PTR [r10+224], rax
|
|
|
- sbb r8, QWORD PTR [r11+232]
|
|
|
- mov rax, QWORD PTR [rdx+240]
|
|
|
- mov QWORD PTR [r10+232], r8
|
|
|
- sbb rax, QWORD PTR [r11+240]
|
|
|
- mov r8, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [r10+240], rax
|
|
|
- sbb r8, QWORD PTR [r11+248]
|
|
|
- mov QWORD PTR [r10+248], r8
|
|
|
- sbb r9, 0
|
|
|
- ; Cond Negate
|
|
|
- mov rax, QWORD PTR [r10]
|
|
|
- mov r11, r9
|
|
|
- xor rax, r9
|
|
|
- neg r11
|
|
|
- sub rax, r9
|
|
|
- mov r8, QWORD PTR [r10+8]
|
|
|
- sbb r11, 0
|
|
|
- mov QWORD PTR [r10], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+16]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+8], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+24]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+16], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+32]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+24], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+40]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+32], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+40], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+56]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+48], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+64]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+56], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+72]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+64], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+80]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+72], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+88]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+80], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+88], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+104]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+96], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+112]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+104], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+120]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+112], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+128]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+120], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+136]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+128], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+144]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+136], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+152]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+144], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+160]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+152], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+168]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+160], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+176]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+168], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+184]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+176], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+192]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+184], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+200]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+192], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+208]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+200], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+216]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+208], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+224]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+216], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+232]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+224], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov rax, QWORD PTR [r10+240]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+232], r8
|
|
|
- xor rax, r9
|
|
|
- add rax, r11
|
|
|
- mov r8, QWORD PTR [r10+248]
|
|
|
- setc r11b
|
|
|
- mov QWORD PTR [r10+240], rax
|
|
|
- xor r8, r9
|
|
|
- add r8, r11
|
|
|
- mov QWORD PTR [r10+248], r8
|
|
|
- mov rdx, r10
|
|
|
- mov rcx, rsp
|
|
|
- call sp_2048_sqr_avx2_32
|
|
|
- mov rdx, QWORD PTR [rsp+520]
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
- add rdx, 256
|
|
|
- add rcx, 512
|
|
|
- call sp_2048_sqr_avx2_32
|
|
|
- mov rdx, QWORD PTR [rsp+520]
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
- call sp_2048_sqr_avx2_32
|
|
|
-IFDEF _WIN64
|
|
|
- mov rdx, QWORD PTR [rsp+520]
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
-ENDIF
|
|
|
- mov rdx, QWORD PTR [rsp+512]
|
|
|
- lea r10, QWORD PTR [rsp+256]
|
|
|
- add rdx, 768
|
|
|
- mov r9, 0
|
|
|
- mov r8, QWORD PTR [r10+-256]
|
|
|
- sub r8, QWORD PTR [rdx+-256]
|
|
|
- mov rax, QWORD PTR [r10+-248]
|
|
|
- mov QWORD PTR [r10+-256], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-248]
|
|
|
- mov r8, QWORD PTR [r10+-240]
|
|
|
- mov QWORD PTR [r10+-248], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-240]
|
|
|
- mov rax, QWORD PTR [r10+-232]
|
|
|
- mov QWORD PTR [r10+-240], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-232]
|
|
|
- mov r8, QWORD PTR [r10+-224]
|
|
|
- mov QWORD PTR [r10+-232], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-224]
|
|
|
- mov rax, QWORD PTR [r10+-216]
|
|
|
- mov QWORD PTR [r10+-224], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-216]
|
|
|
- mov r8, QWORD PTR [r10+-208]
|
|
|
- mov QWORD PTR [r10+-216], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-208]
|
|
|
- mov rax, QWORD PTR [r10+-200]
|
|
|
- mov QWORD PTR [r10+-208], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-200]
|
|
|
- mov r8, QWORD PTR [r10+-192]
|
|
|
- mov QWORD PTR [r10+-200], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-192]
|
|
|
- mov rax, QWORD PTR [r10+-184]
|
|
|
- mov QWORD PTR [r10+-192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-184]
|
|
|
- mov r8, QWORD PTR [r10+-176]
|
|
|
- mov QWORD PTR [r10+-184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-176]
|
|
|
- mov rax, QWORD PTR [r10+-168]
|
|
|
- mov QWORD PTR [r10+-176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-168]
|
|
|
- mov r8, QWORD PTR [r10+-160]
|
|
|
- mov QWORD PTR [r10+-168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-160]
|
|
|
- mov rax, QWORD PTR [r10+-152]
|
|
|
- mov QWORD PTR [r10+-160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-152]
|
|
|
- mov r8, QWORD PTR [r10+-144]
|
|
|
- mov QWORD PTR [r10+-152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-144]
|
|
|
- mov rax, QWORD PTR [r10+-136]
|
|
|
- mov QWORD PTR [r10+-144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-136]
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- mov QWORD PTR [r10+-136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [r10+128]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov rax, QWORD PTR [r10+136]
|
|
|
- mov QWORD PTR [r10+128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [r10+144]
|
|
|
- mov QWORD PTR [r10+136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov rax, QWORD PTR [r10+152]
|
|
|
- mov QWORD PTR [r10+144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [r10+160]
|
|
|
- mov QWORD PTR [r10+152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov QWORD PTR [r10+160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [r10+176]
|
|
|
- mov QWORD PTR [r10+168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov rax, QWORD PTR [r10+184]
|
|
|
- mov QWORD PTR [r10+176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [r10+192]
|
|
|
- mov QWORD PTR [r10+184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+192]
|
|
|
- mov rax, QWORD PTR [r10+200]
|
|
|
- mov QWORD PTR [r10+192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+200]
|
|
|
- mov r8, QWORD PTR [r10+208]
|
|
|
- mov QWORD PTR [r10+200], rax
|
|
|
- sbb r8, QWORD PTR [rdx+208]
|
|
|
- mov rax, QWORD PTR [r10+216]
|
|
|
- mov QWORD PTR [r10+208], r8
|
|
|
- sbb rax, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [r10+224]
|
|
|
- mov QWORD PTR [r10+216], rax
|
|
|
- sbb r8, QWORD PTR [rdx+224]
|
|
|
- mov rax, QWORD PTR [r10+232]
|
|
|
- mov QWORD PTR [r10+224], r8
|
|
|
- sbb rax, QWORD PTR [rdx+232]
|
|
|
- mov r8, QWORD PTR [r10+240]
|
|
|
- mov QWORD PTR [r10+232], rax
|
|
|
- sbb r8, QWORD PTR [rdx+240]
|
|
|
- mov rax, QWORD PTR [r10+248]
|
|
|
- mov QWORD PTR [r10+240], r8
|
|
|
- sbb rax, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [r10+248], rax
|
|
|
- sbb r9, 0
|
|
|
- sub rdx, 512
|
|
|
- mov r8, QWORD PTR [r10+-256]
|
|
|
- sub r8, QWORD PTR [rdx+-256]
|
|
|
- mov rax, QWORD PTR [r10+-248]
|
|
|
- mov QWORD PTR [r10+-256], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-248]
|
|
|
- mov r8, QWORD PTR [r10+-240]
|
|
|
- mov QWORD PTR [r10+-248], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-240]
|
|
|
- mov rax, QWORD PTR [r10+-232]
|
|
|
- mov QWORD PTR [r10+-240], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-232]
|
|
|
- mov r8, QWORD PTR [r10+-224]
|
|
|
- mov QWORD PTR [r10+-232], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-224]
|
|
|
- mov rax, QWORD PTR [r10+-216]
|
|
|
- mov QWORD PTR [r10+-224], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-216]
|
|
|
- mov r8, QWORD PTR [r10+-208]
|
|
|
- mov QWORD PTR [r10+-216], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-208]
|
|
|
- mov rax, QWORD PTR [r10+-200]
|
|
|
- mov QWORD PTR [r10+-208], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-200]
|
|
|
- mov r8, QWORD PTR [r10+-192]
|
|
|
- mov QWORD PTR [r10+-200], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-192]
|
|
|
- mov rax, QWORD PTR [r10+-184]
|
|
|
- mov QWORD PTR [r10+-192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-184]
|
|
|
- mov r8, QWORD PTR [r10+-176]
|
|
|
- mov QWORD PTR [r10+-184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-176]
|
|
|
- mov rax, QWORD PTR [r10+-168]
|
|
|
- mov QWORD PTR [r10+-176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-168]
|
|
|
- mov r8, QWORD PTR [r10+-160]
|
|
|
- mov QWORD PTR [r10+-168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-160]
|
|
|
- mov rax, QWORD PTR [r10+-152]
|
|
|
- mov QWORD PTR [r10+-160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-152]
|
|
|
- mov r8, QWORD PTR [r10+-144]
|
|
|
- mov QWORD PTR [r10+-152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-144]
|
|
|
- mov rax, QWORD PTR [r10+-136]
|
|
|
- mov QWORD PTR [r10+-144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-136]
|
|
|
- mov r8, QWORD PTR [r10+-128]
|
|
|
- mov QWORD PTR [r10+-136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-128]
|
|
|
- mov rax, QWORD PTR [r10+-120]
|
|
|
- mov QWORD PTR [r10+-128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-120]
|
|
|
- mov r8, QWORD PTR [r10+-112]
|
|
|
- mov QWORD PTR [r10+-120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-112]
|
|
|
- mov rax, QWORD PTR [r10+-104]
|
|
|
- mov QWORD PTR [r10+-112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-104]
|
|
|
- mov r8, QWORD PTR [r10+-96]
|
|
|
- mov QWORD PTR [r10+-104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-96]
|
|
|
- mov rax, QWORD PTR [r10+-88]
|
|
|
- mov QWORD PTR [r10+-96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-88]
|
|
|
- mov r8, QWORD PTR [r10+-80]
|
|
|
- mov QWORD PTR [r10+-88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-80]
|
|
|
- mov rax, QWORD PTR [r10+-72]
|
|
|
- mov QWORD PTR [r10+-80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-72]
|
|
|
- mov r8, QWORD PTR [r10+-64]
|
|
|
- mov QWORD PTR [r10+-72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-64]
|
|
|
- mov rax, QWORD PTR [r10+-56]
|
|
|
- mov QWORD PTR [r10+-64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-56]
|
|
|
- mov r8, QWORD PTR [r10+-48]
|
|
|
- mov QWORD PTR [r10+-56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-48]
|
|
|
- mov rax, QWORD PTR [r10+-40]
|
|
|
- mov QWORD PTR [r10+-48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-40]
|
|
|
- mov r8, QWORD PTR [r10+-32]
|
|
|
- mov QWORD PTR [r10+-40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-32]
|
|
|
- mov rax, QWORD PTR [r10+-24]
|
|
|
- mov QWORD PTR [r10+-32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-24]
|
|
|
- mov r8, QWORD PTR [r10+-16]
|
|
|
- mov QWORD PTR [r10+-24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+-16]
|
|
|
- mov rax, QWORD PTR [r10+-8]
|
|
|
- mov QWORD PTR [r10+-16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+-8]
|
|
|
- mov r8, QWORD PTR [r10]
|
|
|
- mov QWORD PTR [r10+-8], rax
|
|
|
- sbb r8, QWORD PTR [rdx]
|
|
|
- mov rax, QWORD PTR [r10+8]
|
|
|
- mov QWORD PTR [r10], r8
|
|
|
- sbb rax, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [r10+16]
|
|
|
- mov QWORD PTR [r10+8], rax
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov QWORD PTR [r10+16], r8
|
|
|
- sbb rax, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [r10+32]
|
|
|
- mov QWORD PTR [r10+24], rax
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov rax, QWORD PTR [r10+40]
|
|
|
- mov QWORD PTR [r10+32], r8
|
|
|
- sbb rax, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [r10+48]
|
|
|
- mov QWORD PTR [r10+40], rax
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov rax, QWORD PTR [r10+56]
|
|
|
- mov QWORD PTR [r10+48], r8
|
|
|
- sbb rax, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [r10+64]
|
|
|
- mov QWORD PTR [r10+56], rax
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov QWORD PTR [r10+64], r8
|
|
|
- sbb rax, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [r10+80]
|
|
|
- mov QWORD PTR [r10+72], rax
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov rax, QWORD PTR [r10+88]
|
|
|
- mov QWORD PTR [r10+80], r8
|
|
|
- sbb rax, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [r10+96]
|
|
|
- mov QWORD PTR [r10+88], rax
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov rax, QWORD PTR [r10+104]
|
|
|
- mov QWORD PTR [r10+96], r8
|
|
|
- sbb rax, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [r10+112]
|
|
|
- mov QWORD PTR [r10+104], rax
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov QWORD PTR [r10+112], r8
|
|
|
- sbb rax, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [r10+128]
|
|
|
- mov QWORD PTR [r10+120], rax
|
|
|
- sbb r8, QWORD PTR [rdx+128]
|
|
|
- mov rax, QWORD PTR [r10+136]
|
|
|
- mov QWORD PTR [r10+128], r8
|
|
|
- sbb rax, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [r10+144]
|
|
|
- mov QWORD PTR [r10+136], rax
|
|
|
- sbb r8, QWORD PTR [rdx+144]
|
|
|
- mov rax, QWORD PTR [r10+152]
|
|
|
- mov QWORD PTR [r10+144], r8
|
|
|
- sbb rax, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [r10+160]
|
|
|
- mov QWORD PTR [r10+152], rax
|
|
|
- sbb r8, QWORD PTR [rdx+160]
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov QWORD PTR [r10+160], r8
|
|
|
- sbb rax, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [r10+176]
|
|
|
- mov QWORD PTR [r10+168], rax
|
|
|
- sbb r8, QWORD PTR [rdx+176]
|
|
|
- mov rax, QWORD PTR [r10+184]
|
|
|
- mov QWORD PTR [r10+176], r8
|
|
|
- sbb rax, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [r10+192]
|
|
|
- mov QWORD PTR [r10+184], rax
|
|
|
- sbb r8, QWORD PTR [rdx+192]
|
|
|
- mov rax, QWORD PTR [r10+200]
|
|
|
- mov QWORD PTR [r10+192], r8
|
|
|
- sbb rax, QWORD PTR [rdx+200]
|
|
|
- mov r8, QWORD PTR [r10+208]
|
|
|
- mov QWORD PTR [r10+200], rax
|
|
|
- sbb r8, QWORD PTR [rdx+208]
|
|
|
- mov rax, QWORD PTR [r10+216]
|
|
|
- mov QWORD PTR [r10+208], r8
|
|
|
- sbb rax, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [r10+224]
|
|
|
- mov QWORD PTR [r10+216], rax
|
|
|
- sbb r8, QWORD PTR [rdx+224]
|
|
|
- mov rax, QWORD PTR [r10+232]
|
|
|
- mov QWORD PTR [r10+224], r8
|
|
|
- sbb rax, QWORD PTR [rdx+232]
|
|
|
- mov r8, QWORD PTR [r10+240]
|
|
|
- mov QWORD PTR [r10+232], rax
|
|
|
- sbb r8, QWORD PTR [rdx+240]
|
|
|
- mov rax, QWORD PTR [r10+248]
|
|
|
- mov QWORD PTR [r10+240], r8
|
|
|
- sbb rax, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [r10+248], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
- neg r9
|
|
|
- add rcx, 512
|
|
|
- mov r8, QWORD PTR [rcx+-256]
|
|
|
- sub r8, QWORD PTR [r10+-256]
|
|
|
- mov rax, QWORD PTR [rcx+-248]
|
|
|
- mov QWORD PTR [rcx+-256], r8
|
|
|
- sbb rax, QWORD PTR [r10+-248]
|
|
|
- mov r8, QWORD PTR [rcx+-240]
|
|
|
- mov QWORD PTR [rcx+-248], rax
|
|
|
- sbb r8, QWORD PTR [r10+-240]
|
|
|
- mov rax, QWORD PTR [rcx+-232]
|
|
|
- mov QWORD PTR [rcx+-240], r8
|
|
|
- sbb rax, QWORD PTR [r10+-232]
|
|
|
- mov r8, QWORD PTR [rcx+-224]
|
|
|
- mov QWORD PTR [rcx+-232], rax
|
|
|
- sbb r8, QWORD PTR [r10+-224]
|
|
|
- mov rax, QWORD PTR [rcx+-216]
|
|
|
- mov QWORD PTR [rcx+-224], r8
|
|
|
- sbb rax, QWORD PTR [r10+-216]
|
|
|
- mov r8, QWORD PTR [rcx+-208]
|
|
|
- mov QWORD PTR [rcx+-216], rax
|
|
|
- sbb r8, QWORD PTR [r10+-208]
|
|
|
- mov rax, QWORD PTR [rcx+-200]
|
|
|
- mov QWORD PTR [rcx+-208], r8
|
|
|
- sbb rax, QWORD PTR [r10+-200]
|
|
|
- mov r8, QWORD PTR [rcx+-192]
|
|
|
- mov QWORD PTR [rcx+-200], rax
|
|
|
- sbb r8, QWORD PTR [r10+-192]
|
|
|
- mov rax, QWORD PTR [rcx+-184]
|
|
|
- mov QWORD PTR [rcx+-192], r8
|
|
|
- sbb rax, QWORD PTR [r10+-184]
|
|
|
- mov r8, QWORD PTR [rcx+-176]
|
|
|
- mov QWORD PTR [rcx+-184], rax
|
|
|
- sbb r8, QWORD PTR [r10+-176]
|
|
|
- mov rax, QWORD PTR [rcx+-168]
|
|
|
- mov QWORD PTR [rcx+-176], r8
|
|
|
- sbb rax, QWORD PTR [r10+-168]
|
|
|
- mov r8, QWORD PTR [rcx+-160]
|
|
|
- mov QWORD PTR [rcx+-168], rax
|
|
|
- sbb r8, QWORD PTR [r10+-160]
|
|
|
- mov rax, QWORD PTR [rcx+-152]
|
|
|
- mov QWORD PTR [rcx+-160], r8
|
|
|
- sbb rax, QWORD PTR [r10+-152]
|
|
|
- mov r8, QWORD PTR [rcx+-144]
|
|
|
- mov QWORD PTR [rcx+-152], rax
|
|
|
- sbb r8, QWORD PTR [r10+-144]
|
|
|
- mov rax, QWORD PTR [rcx+-136]
|
|
|
- mov QWORD PTR [rcx+-144], r8
|
|
|
- sbb rax, QWORD PTR [r10+-136]
|
|
|
- mov r8, QWORD PTR [rcx+-128]
|
|
|
- mov QWORD PTR [rcx+-136], rax
|
|
|
- sbb r8, QWORD PTR [r10+-128]
|
|
|
- mov rax, QWORD PTR [rcx+-120]
|
|
|
- mov QWORD PTR [rcx+-128], r8
|
|
|
- sbb rax, QWORD PTR [r10+-120]
|
|
|
- mov r8, QWORD PTR [rcx+-112]
|
|
|
- mov QWORD PTR [rcx+-120], rax
|
|
|
- sbb r8, QWORD PTR [r10+-112]
|
|
|
- mov rax, QWORD PTR [rcx+-104]
|
|
|
- mov QWORD PTR [rcx+-112], r8
|
|
|
- sbb rax, QWORD PTR [r10+-104]
|
|
|
- mov r8, QWORD PTR [rcx+-96]
|
|
|
- mov QWORD PTR [rcx+-104], rax
|
|
|
- sbb r8, QWORD PTR [r10+-96]
|
|
|
- mov rax, QWORD PTR [rcx+-88]
|
|
|
- mov QWORD PTR [rcx+-96], r8
|
|
|
- sbb rax, QWORD PTR [r10+-88]
|
|
|
- mov r8, QWORD PTR [rcx+-80]
|
|
|
- mov QWORD PTR [rcx+-88], rax
|
|
|
- sbb r8, QWORD PTR [r10+-80]
|
|
|
- mov rax, QWORD PTR [rcx+-72]
|
|
|
- mov QWORD PTR [rcx+-80], r8
|
|
|
- sbb rax, QWORD PTR [r10+-72]
|
|
|
- mov r8, QWORD PTR [rcx+-64]
|
|
|
- mov QWORD PTR [rcx+-72], rax
|
|
|
- sbb r8, QWORD PTR [r10+-64]
|
|
|
- mov rax, QWORD PTR [rcx+-56]
|
|
|
- mov QWORD PTR [rcx+-64], r8
|
|
|
- sbb rax, QWORD PTR [r10+-56]
|
|
|
- mov r8, QWORD PTR [rcx+-48]
|
|
|
- mov QWORD PTR [rcx+-56], rax
|
|
|
- sbb r8, QWORD PTR [r10+-48]
|
|
|
- mov rax, QWORD PTR [rcx+-40]
|
|
|
- mov QWORD PTR [rcx+-48], r8
|
|
|
- sbb rax, QWORD PTR [r10+-40]
|
|
|
- mov r8, QWORD PTR [rcx+-32]
|
|
|
- mov QWORD PTR [rcx+-40], rax
|
|
|
- sbb r8, QWORD PTR [r10+-32]
|
|
|
- mov rax, QWORD PTR [rcx+-24]
|
|
|
- mov QWORD PTR [rcx+-32], r8
|
|
|
- sbb rax, QWORD PTR [r10+-24]
|
|
|
- mov r8, QWORD PTR [rcx+-16]
|
|
|
- mov QWORD PTR [rcx+-24], rax
|
|
|
- sbb r8, QWORD PTR [r10+-16]
|
|
|
- mov rax, QWORD PTR [rcx+-8]
|
|
|
- mov QWORD PTR [rcx+-16], r8
|
|
|
- sbb rax, QWORD PTR [r10+-8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rcx+-8], rax
|
|
|
- sbb r8, QWORD PTR [r10]
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb rax, QWORD PTR [r10+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- sbb r8, QWORD PTR [r10+16]
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb rax, QWORD PTR [r10+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- sbb r8, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb rax, QWORD PTR [r10+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- sbb r8, QWORD PTR [r10+48]
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb rax, QWORD PTR [r10+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- sbb r8, QWORD PTR [r10+64]
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb rax, QWORD PTR [r10+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- sbb r8, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb rax, QWORD PTR [r10+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- sbb r8, QWORD PTR [r10+96]
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb rax, QWORD PTR [r10+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- sbb r8, QWORD PTR [r10+112]
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb rax, QWORD PTR [r10+120]
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- sbb r8, QWORD PTR [r10+128]
|
|
|
- mov rax, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- sbb rax, QWORD PTR [r10+136]
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], rax
|
|
|
- sbb r8, QWORD PTR [r10+144]
|
|
|
- mov rax, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- sbb rax, QWORD PTR [r10+152]
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], rax
|
|
|
- sbb r8, QWORD PTR [r10+160]
|
|
|
- mov rax, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- sbb rax, QWORD PTR [r10+168]
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], rax
|
|
|
- sbb r8, QWORD PTR [r10+176]
|
|
|
- mov rax, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- sbb rax, QWORD PTR [r10+184]
|
|
|
- mov r8, QWORD PTR [rcx+192]
|
|
|
- mov QWORD PTR [rcx+184], rax
|
|
|
- sbb r8, QWORD PTR [r10+192]
|
|
|
- mov rax, QWORD PTR [rcx+200]
|
|
|
- mov QWORD PTR [rcx+192], r8
|
|
|
- sbb rax, QWORD PTR [r10+200]
|
|
|
- mov r8, QWORD PTR [rcx+208]
|
|
|
- mov QWORD PTR [rcx+200], rax
|
|
|
- sbb r8, QWORD PTR [r10+208]
|
|
|
- mov rax, QWORD PTR [rcx+216]
|
|
|
- mov QWORD PTR [rcx+208], r8
|
|
|
- sbb rax, QWORD PTR [r10+216]
|
|
|
- mov r8, QWORD PTR [rcx+224]
|
|
|
- mov QWORD PTR [rcx+216], rax
|
|
|
- sbb r8, QWORD PTR [r10+224]
|
|
|
- mov rax, QWORD PTR [rcx+232]
|
|
|
- mov QWORD PTR [rcx+224], r8
|
|
|
- sbb rax, QWORD PTR [r10+232]
|
|
|
- mov r8, QWORD PTR [rcx+240]
|
|
|
- mov QWORD PTR [rcx+232], rax
|
|
|
- sbb r8, QWORD PTR [r10+240]
|
|
|
- mov rax, QWORD PTR [rcx+248]
|
|
|
- mov QWORD PTR [rcx+240], r8
|
|
|
- sbb rax, QWORD PTR [r10+248]
|
|
|
- mov QWORD PTR [rcx+248], rax
|
|
|
- sbb r9, 0
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
- add rcx, 768
|
|
|
- ; Add in word
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- add r8, r9
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+128]
|
|
|
- mov QWORD PTR [rcx+120], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+136]
|
|
|
- mov QWORD PTR [rcx+128], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+144]
|
|
|
- mov QWORD PTR [rcx+136], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+152]
|
|
|
- mov QWORD PTR [rcx+144], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+160]
|
|
|
- mov QWORD PTR [rcx+152], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+168]
|
|
|
- mov QWORD PTR [rcx+160], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+176]
|
|
|
- mov QWORD PTR [rcx+168], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+184]
|
|
|
- mov QWORD PTR [rcx+176], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+192]
|
|
|
- mov QWORD PTR [rcx+184], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+200]
|
|
|
- mov QWORD PTR [rcx+192], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+208]
|
|
|
- mov QWORD PTR [rcx+200], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+216]
|
|
|
- mov QWORD PTR [rcx+208], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+224]
|
|
|
- mov QWORD PTR [rcx+216], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+232]
|
|
|
- mov QWORD PTR [rcx+224], r8
|
|
|
- adc rax, 0
|
|
|
- mov r8, QWORD PTR [rcx+240]
|
|
|
- mov QWORD PTR [rcx+232], rax
|
|
|
- adc r8, 0
|
|
|
- mov rax, QWORD PTR [rcx+248]
|
|
|
- mov QWORD PTR [rcx+240], r8
|
|
|
- adc rax, 0
|
|
|
- mov QWORD PTR [rcx+248], rax
|
|
|
- mov rdx, QWORD PTR [rsp+520]
|
|
|
- mov rcx, QWORD PTR [rsp+512]
|
|
|
- add rsp, 528
|
|
|
- ret
|
|
|
-sp_4096_sqr_avx2_64 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_mul_d_64 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- mov r10, rax
|
|
|
- mov r11, rdx
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- ; A[1] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[16] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+128]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[17] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+136]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[18] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+144]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[19] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+152]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[20] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+160]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[21] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+168]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[22] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+176]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[23] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+184]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[24] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+192]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[25] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+200]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[26] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+208]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+208], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[27] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+216]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[28] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+224]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[29] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+232]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[30] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+240]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[31] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+248]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[32] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+256]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+256], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[33] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+264]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+264], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[34] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+272]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+272], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[35] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+280]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+280], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[36] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+288]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+288], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[37] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+296]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+296], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[38] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+304]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+304], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[39] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+312]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+312], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[40] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+320]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+320], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[41] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+328]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+328], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[42] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+336]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+336], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[43] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+344]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+344], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[44] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+352]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+352], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[45] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+360]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+360], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[46] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+368]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+368], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[47] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+376]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+376], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[48] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+384]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+384], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[49] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+392]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+392], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[50] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+400]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+400], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[51] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+408]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+408], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[52] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+416]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+416], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[53] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+424]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+424], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[54] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+432]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+432], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[55] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+440]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+440], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[56] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+448]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+448], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[57] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+456]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+456], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[58] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+464]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+464], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[59] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+472]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+472], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[60] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+480]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+480], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[61] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+488]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+488], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[62] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+496]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+496], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[63] * B
|
|
|
- mov rax, r8
|
|
|
- mul QWORD PTR [r9+504]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- mov QWORD PTR [rcx+504], r10
|
|
|
- mov QWORD PTR [rcx+512], r11
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_mul_d_64 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_cond_sub_64 PROC
|
|
|
- sub rsp, 512
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov r10, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [r8+136]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+128], r10
|
|
|
- mov QWORD PTR [rsp+136], r11
|
|
|
- mov r10, QWORD PTR [r8+144]
|
|
|
- mov r11, QWORD PTR [r8+152]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+144], r10
|
|
|
- mov QWORD PTR [rsp+152], r11
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+160], r10
|
|
|
- mov QWORD PTR [rsp+168], r11
|
|
|
- mov r10, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [r8+184]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+176], r10
|
|
|
- mov QWORD PTR [rsp+184], r11
|
|
|
- mov r10, QWORD PTR [r8+192]
|
|
|
- mov r11, QWORD PTR [r8+200]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+192], r10
|
|
|
- mov QWORD PTR [rsp+200], r11
|
|
|
- mov r10, QWORD PTR [r8+208]
|
|
|
- mov r11, QWORD PTR [r8+216]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+208], r10
|
|
|
- mov QWORD PTR [rsp+216], r11
|
|
|
- mov r10, QWORD PTR [r8+224]
|
|
|
- mov r11, QWORD PTR [r8+232]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+224], r10
|
|
|
- mov QWORD PTR [rsp+232], r11
|
|
|
- mov r10, QWORD PTR [r8+240]
|
|
|
- mov r11, QWORD PTR [r8+248]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+240], r10
|
|
|
- mov QWORD PTR [rsp+248], r11
|
|
|
- mov r10, QWORD PTR [r8+256]
|
|
|
- mov r11, QWORD PTR [r8+264]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+256], r10
|
|
|
- mov QWORD PTR [rsp+264], r11
|
|
|
- mov r10, QWORD PTR [r8+272]
|
|
|
- mov r11, QWORD PTR [r8+280]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+272], r10
|
|
|
- mov QWORD PTR [rsp+280], r11
|
|
|
- mov r10, QWORD PTR [r8+288]
|
|
|
- mov r11, QWORD PTR [r8+296]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+288], r10
|
|
|
- mov QWORD PTR [rsp+296], r11
|
|
|
- mov r10, QWORD PTR [r8+304]
|
|
|
- mov r11, QWORD PTR [r8+312]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+304], r10
|
|
|
- mov QWORD PTR [rsp+312], r11
|
|
|
- mov r10, QWORD PTR [r8+320]
|
|
|
- mov r11, QWORD PTR [r8+328]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+320], r10
|
|
|
- mov QWORD PTR [rsp+328], r11
|
|
|
- mov r10, QWORD PTR [r8+336]
|
|
|
- mov r11, QWORD PTR [r8+344]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+336], r10
|
|
|
- mov QWORD PTR [rsp+344], r11
|
|
|
- mov r10, QWORD PTR [r8+352]
|
|
|
- mov r11, QWORD PTR [r8+360]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+352], r10
|
|
|
- mov QWORD PTR [rsp+360], r11
|
|
|
- mov r10, QWORD PTR [r8+368]
|
|
|
- mov r11, QWORD PTR [r8+376]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+368], r10
|
|
|
- mov QWORD PTR [rsp+376], r11
|
|
|
- mov r10, QWORD PTR [r8+384]
|
|
|
- mov r11, QWORD PTR [r8+392]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+384], r10
|
|
|
- mov QWORD PTR [rsp+392], r11
|
|
|
- mov r10, QWORD PTR [r8+400]
|
|
|
- mov r11, QWORD PTR [r8+408]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+400], r10
|
|
|
- mov QWORD PTR [rsp+408], r11
|
|
|
- mov r10, QWORD PTR [r8+416]
|
|
|
- mov r11, QWORD PTR [r8+424]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+416], r10
|
|
|
- mov QWORD PTR [rsp+424], r11
|
|
|
- mov r10, QWORD PTR [r8+432]
|
|
|
- mov r11, QWORD PTR [r8+440]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+432], r10
|
|
|
- mov QWORD PTR [rsp+440], r11
|
|
|
- mov r10, QWORD PTR [r8+448]
|
|
|
- mov r11, QWORD PTR [r8+456]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+448], r10
|
|
|
- mov QWORD PTR [rsp+456], r11
|
|
|
- mov r10, QWORD PTR [r8+464]
|
|
|
- mov r11, QWORD PTR [r8+472]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+464], r10
|
|
|
- mov QWORD PTR [rsp+472], r11
|
|
|
- mov r10, QWORD PTR [r8+480]
|
|
|
- mov r11, QWORD PTR [r8+488]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+480], r10
|
|
|
- mov QWORD PTR [rsp+488], r11
|
|
|
- mov r10, QWORD PTR [r8+496]
|
|
|
- mov r11, QWORD PTR [r8+504]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+496], r10
|
|
|
- mov QWORD PTR [rsp+504], r11
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rsp]
|
|
|
- sub r10, r8
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rsp+8]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r8, QWORD PTR [rsp+16]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rsp+24]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rsp+32]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rsp+40]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r8, QWORD PTR [rsp+48]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rsp+56]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov r8, QWORD PTR [rsp+64]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov r11, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rsp+72]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r8, QWORD PTR [rsp+80]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rsp+88]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- mov r8, QWORD PTR [rsp+96]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rsp+104]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r8, QWORD PTR [rsp+112]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rsp+120]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov r10, QWORD PTR [rdx+128]
|
|
|
- mov r8, QWORD PTR [rsp+128]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- mov r11, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [rsp+136]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+128], r10
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- mov r8, QWORD PTR [rsp+144]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+136], r11
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rsp+152]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- mov r10, QWORD PTR [rdx+160]
|
|
|
- mov r8, QWORD PTR [rsp+160]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- mov r11, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [rsp+168]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+160], r10
|
|
|
- mov r10, QWORD PTR [rdx+176]
|
|
|
- mov r8, QWORD PTR [rsp+176]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+168], r11
|
|
|
- mov r11, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rsp+184]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+176], r10
|
|
|
- mov r10, QWORD PTR [rdx+192]
|
|
|
- mov r8, QWORD PTR [rsp+192]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+184], r11
|
|
|
- mov r11, QWORD PTR [rdx+200]
|
|
|
- mov r8, QWORD PTR [rsp+200]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- mov r10, QWORD PTR [rdx+208]
|
|
|
- mov r8, QWORD PTR [rsp+208]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- mov r11, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [rsp+216]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+208], r10
|
|
|
- mov r10, QWORD PTR [rdx+224]
|
|
|
- mov r8, QWORD PTR [rsp+224]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+216], r11
|
|
|
- mov r11, QWORD PTR [rdx+232]
|
|
|
- mov r8, QWORD PTR [rsp+232]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+224], r10
|
|
|
- mov r10, QWORD PTR [rdx+240]
|
|
|
- mov r8, QWORD PTR [rsp+240]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+232], r11
|
|
|
- mov r11, QWORD PTR [rdx+248]
|
|
|
- mov r8, QWORD PTR [rsp+248]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- mov r10, QWORD PTR [rdx+256]
|
|
|
- mov r8, QWORD PTR [rsp+256]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- mov r11, QWORD PTR [rdx+264]
|
|
|
- mov r8, QWORD PTR [rsp+264]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+256], r10
|
|
|
- mov r10, QWORD PTR [rdx+272]
|
|
|
- mov r8, QWORD PTR [rsp+272]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+264], r11
|
|
|
- mov r11, QWORD PTR [rdx+280]
|
|
|
- mov r8, QWORD PTR [rsp+280]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+272], r10
|
|
|
- mov r10, QWORD PTR [rdx+288]
|
|
|
- mov r8, QWORD PTR [rsp+288]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+280], r11
|
|
|
- mov r11, QWORD PTR [rdx+296]
|
|
|
- mov r8, QWORD PTR [rsp+296]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+288], r10
|
|
|
- mov r10, QWORD PTR [rdx+304]
|
|
|
- mov r8, QWORD PTR [rsp+304]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+296], r11
|
|
|
- mov r11, QWORD PTR [rdx+312]
|
|
|
- mov r8, QWORD PTR [rsp+312]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+304], r10
|
|
|
- mov r10, QWORD PTR [rdx+320]
|
|
|
- mov r8, QWORD PTR [rsp+320]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+312], r11
|
|
|
- mov r11, QWORD PTR [rdx+328]
|
|
|
- mov r8, QWORD PTR [rsp+328]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+320], r10
|
|
|
- mov r10, QWORD PTR [rdx+336]
|
|
|
- mov r8, QWORD PTR [rsp+336]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+328], r11
|
|
|
- mov r11, QWORD PTR [rdx+344]
|
|
|
- mov r8, QWORD PTR [rsp+344]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+336], r10
|
|
|
- mov r10, QWORD PTR [rdx+352]
|
|
|
- mov r8, QWORD PTR [rsp+352]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+344], r11
|
|
|
- mov r11, QWORD PTR [rdx+360]
|
|
|
- mov r8, QWORD PTR [rsp+360]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+352], r10
|
|
|
- mov r10, QWORD PTR [rdx+368]
|
|
|
- mov r8, QWORD PTR [rsp+368]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+360], r11
|
|
|
- mov r11, QWORD PTR [rdx+376]
|
|
|
- mov r8, QWORD PTR [rsp+376]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+368], r10
|
|
|
- mov r10, QWORD PTR [rdx+384]
|
|
|
- mov r8, QWORD PTR [rsp+384]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+376], r11
|
|
|
- mov r11, QWORD PTR [rdx+392]
|
|
|
- mov r8, QWORD PTR [rsp+392]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+384], r10
|
|
|
- mov r10, QWORD PTR [rdx+400]
|
|
|
- mov r8, QWORD PTR [rsp+400]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+392], r11
|
|
|
- mov r11, QWORD PTR [rdx+408]
|
|
|
- mov r8, QWORD PTR [rsp+408]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+400], r10
|
|
|
- mov r10, QWORD PTR [rdx+416]
|
|
|
- mov r8, QWORD PTR [rsp+416]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+408], r11
|
|
|
- mov r11, QWORD PTR [rdx+424]
|
|
|
- mov r8, QWORD PTR [rsp+424]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+416], r10
|
|
|
- mov r10, QWORD PTR [rdx+432]
|
|
|
- mov r8, QWORD PTR [rsp+432]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+424], r11
|
|
|
- mov r11, QWORD PTR [rdx+440]
|
|
|
- mov r8, QWORD PTR [rsp+440]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+432], r10
|
|
|
- mov r10, QWORD PTR [rdx+448]
|
|
|
- mov r8, QWORD PTR [rsp+448]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+440], r11
|
|
|
- mov r11, QWORD PTR [rdx+456]
|
|
|
- mov r8, QWORD PTR [rsp+456]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+448], r10
|
|
|
- mov r10, QWORD PTR [rdx+464]
|
|
|
- mov r8, QWORD PTR [rsp+464]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+456], r11
|
|
|
- mov r11, QWORD PTR [rdx+472]
|
|
|
- mov r8, QWORD PTR [rsp+472]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+464], r10
|
|
|
- mov r10, QWORD PTR [rdx+480]
|
|
|
- mov r8, QWORD PTR [rsp+480]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+472], r11
|
|
|
- mov r11, QWORD PTR [rdx+488]
|
|
|
- mov r8, QWORD PTR [rsp+488]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+480], r10
|
|
|
- mov r10, QWORD PTR [rdx+496]
|
|
|
- mov r8, QWORD PTR [rsp+496]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+488], r11
|
|
|
- mov r11, QWORD PTR [rdx+504]
|
|
|
- mov r8, QWORD PTR [rsp+504]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+496], r10
|
|
|
- mov QWORD PTR [rcx+504], r11
|
|
|
- sbb rax, rax
|
|
|
- add rsp, 512
|
|
|
- ret
|
|
|
-sp_4096_cond_sub_64 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 4096 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_mont_reduce_64 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov r9, rdx
|
|
|
- xor rsi, rsi
|
|
|
- ; i = 64
|
|
|
- mov r10, 64
|
|
|
- mov r15, QWORD PTR [rcx]
|
|
|
- mov rdi, QWORD PTR [rcx+8]
|
|
|
-L_4096_mont_reduce_64_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov r13, r15
|
|
|
- imul r13, r8
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- add r15, rax
|
|
|
- adc r12, rdx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- mov r15, rdi
|
|
|
- add r15, rax
|
|
|
- adc r11, rdx
|
|
|
- add r15, r12
|
|
|
- adc r11, 0
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- mov rdi, QWORD PTR [rcx+16]
|
|
|
- add rdi, rax
|
|
|
- adc r12, rdx
|
|
|
- add rdi, r11
|
|
|
- adc r12, 0
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- mov r14, QWORD PTR [rcx+24]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+24], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- mov r14, QWORD PTR [rcx+32]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+32], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- mov r14, QWORD PTR [rcx+40]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+40], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- mov r14, QWORD PTR [rcx+48]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- mov r14, QWORD PTR [rcx+56]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+56], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- mov r14, QWORD PTR [rcx+64]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+64], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- mov r14, QWORD PTR [rcx+72]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+72], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- mov r14, QWORD PTR [rcx+80]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+80], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- mov r14, QWORD PTR [rcx+88]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+88], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- mov r14, QWORD PTR [rcx+96]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+96], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- mov r14, QWORD PTR [rcx+104]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+104], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- mov r14, QWORD PTR [rcx+112]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+112], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- mov r14, QWORD PTR [rcx+120]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+120], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+16] += m[16] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+128]
|
|
|
- mov r14, QWORD PTR [rcx+128]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+128], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+17] += m[17] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+136]
|
|
|
- mov r14, QWORD PTR [rcx+136]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+136], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+18] += m[18] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+144]
|
|
|
- mov r14, QWORD PTR [rcx+144]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+144], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+19] += m[19] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+152]
|
|
|
- mov r14, QWORD PTR [rcx+152]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+152], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+20] += m[20] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+160]
|
|
|
- mov r14, QWORD PTR [rcx+160]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+160], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+21] += m[21] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+168]
|
|
|
- mov r14, QWORD PTR [rcx+168]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+168], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+22] += m[22] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+176]
|
|
|
- mov r14, QWORD PTR [rcx+176]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+176], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+23] += m[23] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+184]
|
|
|
- mov r14, QWORD PTR [rcx+184]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+184], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+24] += m[24] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+192]
|
|
|
- mov r14, QWORD PTR [rcx+192]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+192], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+25] += m[25] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+200]
|
|
|
- mov r14, QWORD PTR [rcx+200]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+200], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+26] += m[26] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+208]
|
|
|
- mov r14, QWORD PTR [rcx+208]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+208], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+27] += m[27] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+216]
|
|
|
- mov r14, QWORD PTR [rcx+216]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+216], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+28] += m[28] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+224]
|
|
|
- mov r14, QWORD PTR [rcx+224]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+224], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+29] += m[29] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+232]
|
|
|
- mov r14, QWORD PTR [rcx+232]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+232], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+30] += m[30] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+240]
|
|
|
- mov r14, QWORD PTR [rcx+240]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+240], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+31] += m[31] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+248]
|
|
|
- mov r14, QWORD PTR [rcx+248]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+248], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+32] += m[32] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+256]
|
|
|
- mov r14, QWORD PTR [rcx+256]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+256], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+33] += m[33] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+264]
|
|
|
- mov r14, QWORD PTR [rcx+264]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+264], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+34] += m[34] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+272]
|
|
|
- mov r14, QWORD PTR [rcx+272]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+272], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+35] += m[35] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+280]
|
|
|
- mov r14, QWORD PTR [rcx+280]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+280], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+36] += m[36] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+288]
|
|
|
- mov r14, QWORD PTR [rcx+288]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+288], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+37] += m[37] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+296]
|
|
|
- mov r14, QWORD PTR [rcx+296]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+296], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+38] += m[38] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+304]
|
|
|
- mov r14, QWORD PTR [rcx+304]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+304], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+39] += m[39] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+312]
|
|
|
- mov r14, QWORD PTR [rcx+312]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+312], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+40] += m[40] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+320]
|
|
|
- mov r14, QWORD PTR [rcx+320]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+320], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+41] += m[41] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+328]
|
|
|
- mov r14, QWORD PTR [rcx+328]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+328], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+42] += m[42] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+336]
|
|
|
- mov r14, QWORD PTR [rcx+336]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+336], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+43] += m[43] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+344]
|
|
|
- mov r14, QWORD PTR [rcx+344]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+344], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+44] += m[44] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+352]
|
|
|
- mov r14, QWORD PTR [rcx+352]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+352], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+45] += m[45] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+360]
|
|
|
- mov r14, QWORD PTR [rcx+360]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+360], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+46] += m[46] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+368]
|
|
|
- mov r14, QWORD PTR [rcx+368]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+368], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+47] += m[47] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+376]
|
|
|
- mov r14, QWORD PTR [rcx+376]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+376], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+48] += m[48] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+384]
|
|
|
- mov r14, QWORD PTR [rcx+384]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+384], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+49] += m[49] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+392]
|
|
|
- mov r14, QWORD PTR [rcx+392]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+392], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+50] += m[50] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+400]
|
|
|
- mov r14, QWORD PTR [rcx+400]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+400], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+51] += m[51] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+408]
|
|
|
- mov r14, QWORD PTR [rcx+408]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+408], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+52] += m[52] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+416]
|
|
|
- mov r14, QWORD PTR [rcx+416]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+416], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+53] += m[53] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+424]
|
|
|
- mov r14, QWORD PTR [rcx+424]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+424], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+54] += m[54] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+432]
|
|
|
- mov r14, QWORD PTR [rcx+432]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+432], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+55] += m[55] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+440]
|
|
|
- mov r14, QWORD PTR [rcx+440]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+440], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+56] += m[56] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+448]
|
|
|
- mov r14, QWORD PTR [rcx+448]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+448], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+57] += m[57] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+456]
|
|
|
- mov r14, QWORD PTR [rcx+456]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+456], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+58] += m[58] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+464]
|
|
|
- mov r14, QWORD PTR [rcx+464]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+464], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+59] += m[59] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+472]
|
|
|
- mov r14, QWORD PTR [rcx+472]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+472], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+60] += m[60] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+480]
|
|
|
- mov r14, QWORD PTR [rcx+480]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+480], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+61] += m[61] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+488]
|
|
|
- mov r14, QWORD PTR [rcx+488]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+488], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+62] += m[62] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+496]
|
|
|
- mov r14, QWORD PTR [rcx+496]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+496], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+63] += m[63] * mu
|
|
|
- mov rax, r13
|
|
|
- mul QWORD PTR [r9+504]
|
|
|
- mov r14, QWORD PTR [rcx+504]
|
|
|
- add r12, rax
|
|
|
- adc rdx, rsi
|
|
|
- mov rsi, 0
|
|
|
- adc rsi, 0
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+504], r14
|
|
|
- adc QWORD PTR [rcx+512], rdx
|
|
|
- adc rsi, 0
|
|
|
- ; i -= 1
|
|
|
- add rcx, 8
|
|
|
- dec r10
|
|
|
- jnz L_4096_mont_reduce_64_loop
|
|
|
- mov QWORD PTR [rcx], r15
|
|
|
- mov QWORD PTR [rcx+8], rdi
|
|
|
- neg rsi
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, r9
|
|
|
- mov r9, rsi
|
|
|
-ELSE
|
|
|
- mov r9, rsi
|
|
|
- mov r8, r9
|
|
|
-ENDIF
|
|
|
- mov rdx, rcx
|
|
|
- mov rcx, rcx
|
|
|
- sub rcx, 512
|
|
|
- call sp_4096_cond_sub_64
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_mont_reduce_64 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Sub b from a into r. (r = a - b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_sub_64 PROC
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- sub r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- sbb r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- sbb r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- sbb r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- sbb r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- sbb r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- sbb r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- sbb r9, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- sbb r10, QWORD PTR [r8+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- sbb r9, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+80], r9
|
|
|
- sbb r10, QWORD PTR [r8+88]
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- sbb r9, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [rcx+96], r9
|
|
|
- sbb r10, QWORD PTR [r8+104]
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- sbb r9, QWORD PTR [r8+112]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+112], r9
|
|
|
- sbb r10, QWORD PTR [r8+120]
|
|
|
- mov r9, QWORD PTR [rdx+128]
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- sbb r9, QWORD PTR [r8+128]
|
|
|
- mov r10, QWORD PTR [rdx+136]
|
|
|
- mov QWORD PTR [rcx+128], r9
|
|
|
- sbb r10, QWORD PTR [r8+136]
|
|
|
- mov r9, QWORD PTR [rdx+144]
|
|
|
- mov QWORD PTR [rcx+136], r10
|
|
|
- sbb r9, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+152]
|
|
|
- mov QWORD PTR [rcx+144], r9
|
|
|
- sbb r10, QWORD PTR [r8+152]
|
|
|
- mov r9, QWORD PTR [rdx+160]
|
|
|
- mov QWORD PTR [rcx+152], r10
|
|
|
- sbb r9, QWORD PTR [r8+160]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- mov QWORD PTR [rcx+160], r9
|
|
|
- sbb r10, QWORD PTR [r8+168]
|
|
|
- mov r9, QWORD PTR [rdx+176]
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- sbb r9, QWORD PTR [r8+176]
|
|
|
- mov r10, QWORD PTR [rdx+184]
|
|
|
- mov QWORD PTR [rcx+176], r9
|
|
|
- sbb r10, QWORD PTR [r8+184]
|
|
|
- mov r9, QWORD PTR [rdx+192]
|
|
|
- mov QWORD PTR [rcx+184], r10
|
|
|
- sbb r9, QWORD PTR [r8+192]
|
|
|
- mov r10, QWORD PTR [rdx+200]
|
|
|
- mov QWORD PTR [rcx+192], r9
|
|
|
- sbb r10, QWORD PTR [r8+200]
|
|
|
- mov r9, QWORD PTR [rdx+208]
|
|
|
- mov QWORD PTR [rcx+200], r10
|
|
|
- sbb r9, QWORD PTR [r8+208]
|
|
|
- mov r10, QWORD PTR [rdx+216]
|
|
|
- mov QWORD PTR [rcx+208], r9
|
|
|
- sbb r10, QWORD PTR [r8+216]
|
|
|
- mov r9, QWORD PTR [rdx+224]
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- sbb r9, QWORD PTR [r8+224]
|
|
|
- mov r10, QWORD PTR [rdx+232]
|
|
|
- mov QWORD PTR [rcx+224], r9
|
|
|
- sbb r10, QWORD PTR [r8+232]
|
|
|
- mov r9, QWORD PTR [rdx+240]
|
|
|
- mov QWORD PTR [rcx+232], r10
|
|
|
- sbb r9, QWORD PTR [r8+240]
|
|
|
- mov r10, QWORD PTR [rdx+248]
|
|
|
- mov QWORD PTR [rcx+240], r9
|
|
|
- sbb r10, QWORD PTR [r8+248]
|
|
|
- mov r9, QWORD PTR [rdx+256]
|
|
|
- mov QWORD PTR [rcx+248], r10
|
|
|
- sbb r9, QWORD PTR [r8+256]
|
|
|
- mov r10, QWORD PTR [rdx+264]
|
|
|
- mov QWORD PTR [rcx+256], r9
|
|
|
- sbb r10, QWORD PTR [r8+264]
|
|
|
- mov r9, QWORD PTR [rdx+272]
|
|
|
- mov QWORD PTR [rcx+264], r10
|
|
|
- sbb r9, QWORD PTR [r8+272]
|
|
|
- mov r10, QWORD PTR [rdx+280]
|
|
|
- mov QWORD PTR [rcx+272], r9
|
|
|
- sbb r10, QWORD PTR [r8+280]
|
|
|
- mov r9, QWORD PTR [rdx+288]
|
|
|
- mov QWORD PTR [rcx+280], r10
|
|
|
- sbb r9, QWORD PTR [r8+288]
|
|
|
- mov r10, QWORD PTR [rdx+296]
|
|
|
- mov QWORD PTR [rcx+288], r9
|
|
|
- sbb r10, QWORD PTR [r8+296]
|
|
|
- mov r9, QWORD PTR [rdx+304]
|
|
|
- mov QWORD PTR [rcx+296], r10
|
|
|
- sbb r9, QWORD PTR [r8+304]
|
|
|
- mov r10, QWORD PTR [rdx+312]
|
|
|
- mov QWORD PTR [rcx+304], r9
|
|
|
- sbb r10, QWORD PTR [r8+312]
|
|
|
- mov r9, QWORD PTR [rdx+320]
|
|
|
- mov QWORD PTR [rcx+312], r10
|
|
|
- sbb r9, QWORD PTR [r8+320]
|
|
|
- mov r10, QWORD PTR [rdx+328]
|
|
|
- mov QWORD PTR [rcx+320], r9
|
|
|
- sbb r10, QWORD PTR [r8+328]
|
|
|
- mov r9, QWORD PTR [rdx+336]
|
|
|
- mov QWORD PTR [rcx+328], r10
|
|
|
- sbb r9, QWORD PTR [r8+336]
|
|
|
- mov r10, QWORD PTR [rdx+344]
|
|
|
- mov QWORD PTR [rcx+336], r9
|
|
|
- sbb r10, QWORD PTR [r8+344]
|
|
|
- mov r9, QWORD PTR [rdx+352]
|
|
|
- mov QWORD PTR [rcx+344], r10
|
|
|
- sbb r9, QWORD PTR [r8+352]
|
|
|
- mov r10, QWORD PTR [rdx+360]
|
|
|
- mov QWORD PTR [rcx+352], r9
|
|
|
- sbb r10, QWORD PTR [r8+360]
|
|
|
- mov r9, QWORD PTR [rdx+368]
|
|
|
- mov QWORD PTR [rcx+360], r10
|
|
|
- sbb r9, QWORD PTR [r8+368]
|
|
|
- mov r10, QWORD PTR [rdx+376]
|
|
|
- mov QWORD PTR [rcx+368], r9
|
|
|
- sbb r10, QWORD PTR [r8+376]
|
|
|
- mov r9, QWORD PTR [rdx+384]
|
|
|
- mov QWORD PTR [rcx+376], r10
|
|
|
- sbb r9, QWORD PTR [r8+384]
|
|
|
- mov r10, QWORD PTR [rdx+392]
|
|
|
- mov QWORD PTR [rcx+384], r9
|
|
|
- sbb r10, QWORD PTR [r8+392]
|
|
|
- mov r9, QWORD PTR [rdx+400]
|
|
|
- mov QWORD PTR [rcx+392], r10
|
|
|
- sbb r9, QWORD PTR [r8+400]
|
|
|
- mov r10, QWORD PTR [rdx+408]
|
|
|
- mov QWORD PTR [rcx+400], r9
|
|
|
- sbb r10, QWORD PTR [r8+408]
|
|
|
- mov r9, QWORD PTR [rdx+416]
|
|
|
- mov QWORD PTR [rcx+408], r10
|
|
|
- sbb r9, QWORD PTR [r8+416]
|
|
|
- mov r10, QWORD PTR [rdx+424]
|
|
|
- mov QWORD PTR [rcx+416], r9
|
|
|
- sbb r10, QWORD PTR [r8+424]
|
|
|
- mov r9, QWORD PTR [rdx+432]
|
|
|
- mov QWORD PTR [rcx+424], r10
|
|
|
- sbb r9, QWORD PTR [r8+432]
|
|
|
- mov r10, QWORD PTR [rdx+440]
|
|
|
- mov QWORD PTR [rcx+432], r9
|
|
|
- sbb r10, QWORD PTR [r8+440]
|
|
|
- mov r9, QWORD PTR [rdx+448]
|
|
|
- mov QWORD PTR [rcx+440], r10
|
|
|
- sbb r9, QWORD PTR [r8+448]
|
|
|
- mov r10, QWORD PTR [rdx+456]
|
|
|
- mov QWORD PTR [rcx+448], r9
|
|
|
- sbb r10, QWORD PTR [r8+456]
|
|
|
- mov r9, QWORD PTR [rdx+464]
|
|
|
- mov QWORD PTR [rcx+456], r10
|
|
|
- sbb r9, QWORD PTR [r8+464]
|
|
|
- mov r10, QWORD PTR [rdx+472]
|
|
|
- mov QWORD PTR [rcx+464], r9
|
|
|
- sbb r10, QWORD PTR [r8+472]
|
|
|
- mov r9, QWORD PTR [rdx+480]
|
|
|
- mov QWORD PTR [rcx+472], r10
|
|
|
- sbb r9, QWORD PTR [r8+480]
|
|
|
- mov r10, QWORD PTR [rdx+488]
|
|
|
- mov QWORD PTR [rcx+480], r9
|
|
|
- sbb r10, QWORD PTR [r8+488]
|
|
|
- mov r9, QWORD PTR [rdx+496]
|
|
|
- mov QWORD PTR [rcx+488], r10
|
|
|
- sbb r9, QWORD PTR [r8+496]
|
|
|
- mov r10, QWORD PTR [rdx+504]
|
|
|
- mov QWORD PTR [rcx+496], r9
|
|
|
- sbb r10, QWORD PTR [r8+504]
|
|
|
- mov QWORD PTR [rcx+504], r10
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_4096_sub_64 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_mul_d_avx2_64 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rdx, r8
|
|
|
- xor r13, r13
|
|
|
- mulx r12, r11, QWORD PTR [rax]
|
|
|
- mov QWORD PTR [rcx], r11
|
|
|
- ; A[1] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+8], r12
|
|
|
- ; A[2] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- ; A[3] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- ; A[4] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- ; A[5] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- ; A[6] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+48]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- ; A[7] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+56]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- ; A[8] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+64]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- ; A[9] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+72]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+72], r12
|
|
|
- ; A[10] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+80]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- ; A[11] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+88]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- ; A[12] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+96]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+96], r11
|
|
|
- ; A[13] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+104]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+104], r12
|
|
|
- ; A[14] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+112]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- ; A[15] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+120]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+120], r12
|
|
|
- ; A[16] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+128]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- ; A[17] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+136]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- ; A[18] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+144]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+144], r11
|
|
|
- ; A[19] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+152]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+152], r12
|
|
|
- ; A[20] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+160]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+160], r11
|
|
|
- ; A[21] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+168]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+168], r12
|
|
|
- ; A[22] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+176]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- ; A[23] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+184]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- ; A[24] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+192]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+192], r11
|
|
|
- ; A[25] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+200]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+200], r12
|
|
|
- ; A[26] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+208]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+208], r11
|
|
|
- ; A[27] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+216]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+216], r12
|
|
|
- ; A[28] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+224]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- ; A[29] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+232]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- ; A[30] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+240]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+240], r11
|
|
|
- ; A[31] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+248]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+248], r12
|
|
|
- ; A[32] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+256]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+256], r11
|
|
|
- ; A[33] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+264]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+264], r12
|
|
|
- ; A[34] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+272]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+272], r11
|
|
|
- ; A[35] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+280]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+280], r12
|
|
|
- ; A[36] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+288]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+288], r11
|
|
|
- ; A[37] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+296]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+296], r12
|
|
|
- ; A[38] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+304]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+304], r11
|
|
|
- ; A[39] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+312]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+312], r12
|
|
|
- ; A[40] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+320]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+320], r11
|
|
|
- ; A[41] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+328]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+328], r12
|
|
|
- ; A[42] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+336]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+336], r11
|
|
|
- ; A[43] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+344]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+344], r12
|
|
|
- ; A[44] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+352]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+352], r11
|
|
|
- ; A[45] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+360]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+360], r12
|
|
|
- ; A[46] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+368]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+368], r11
|
|
|
- ; A[47] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+376]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+376], r12
|
|
|
- ; A[48] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+384]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+384], r11
|
|
|
- ; A[49] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+392]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+392], r12
|
|
|
- ; A[50] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+400]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+400], r11
|
|
|
- ; A[51] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+408]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+408], r12
|
|
|
- ; A[52] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+416]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+416], r11
|
|
|
- ; A[53] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+424]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+424], r12
|
|
|
- ; A[54] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+432]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+432], r11
|
|
|
- ; A[55] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+440]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+440], r12
|
|
|
- ; A[56] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+448]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+448], r11
|
|
|
- ; A[57] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+456]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+456], r12
|
|
|
- ; A[58] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+464]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+464], r11
|
|
|
- ; A[59] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+472]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+472], r12
|
|
|
- ; A[60] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+480]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+480], r11
|
|
|
- ; A[61] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+488]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+488], r12
|
|
|
- ; A[62] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+496]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+496], r11
|
|
|
- ; A[63] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+504]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- adcx r11, r13
|
|
|
- mov QWORD PTR [rcx+504], r12
|
|
|
- mov QWORD PTR [rcx+512], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_mul_d_avx2_64 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF _WIN64
|
|
|
-; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
|
|
|
-; *
|
|
|
-; * d1 The high order half of the number to divide.
|
|
|
-; * d0 The low order half of the number to divide.
|
|
|
-; * div The dividend.
|
|
|
-; * returns the result of the division.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-div_4096_word_asm_64 PROC
|
|
|
- mov r9, rdx
|
|
|
- mov rax, r9
|
|
|
- mov rdx, rcx
|
|
|
- div r8
|
|
|
- ret
|
|
|
-div_4096_word_asm_64 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_cond_sub_avx2_64 PROC
|
|
|
- push r12
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- pext r12, r12, r9
|
|
|
- sub r10, r12
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+104]
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [rdx+128]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+136]
|
|
|
- mov r12, QWORD PTR [rdx+136]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+152]
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r12, QWORD PTR [rdx+160]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [rdx+176]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+184]
|
|
|
- mov r12, QWORD PTR [rdx+184]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+192]
|
|
|
- mov r10, QWORD PTR [rdx+192]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+200]
|
|
|
- mov r11, QWORD PTR [rdx+200]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+208]
|
|
|
- mov r12, QWORD PTR [rdx+208]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+216]
|
|
|
- mov r10, QWORD PTR [rdx+216]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+208], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+224]
|
|
|
- mov r11, QWORD PTR [rdx+224]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+232]
|
|
|
- mov r12, QWORD PTR [rdx+232]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+240]
|
|
|
- mov r10, QWORD PTR [rdx+240]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+248]
|
|
|
- mov r11, QWORD PTR [rdx+248]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+256]
|
|
|
- mov r12, QWORD PTR [rdx+256]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+264]
|
|
|
- mov r10, QWORD PTR [rdx+264]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+256], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+272]
|
|
|
- mov r11, QWORD PTR [rdx+272]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+264], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+280]
|
|
|
- mov r12, QWORD PTR [rdx+280]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+272], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+288]
|
|
|
- mov r10, QWORD PTR [rdx+288]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+280], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+296]
|
|
|
- mov r11, QWORD PTR [rdx+296]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+288], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+304]
|
|
|
- mov r12, QWORD PTR [rdx+304]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+296], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+312]
|
|
|
- mov r10, QWORD PTR [rdx+312]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+304], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+320]
|
|
|
- mov r11, QWORD PTR [rdx+320]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+312], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+328]
|
|
|
- mov r12, QWORD PTR [rdx+328]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+320], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+336]
|
|
|
- mov r10, QWORD PTR [rdx+336]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+328], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+344]
|
|
|
- mov r11, QWORD PTR [rdx+344]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+336], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+352]
|
|
|
- mov r12, QWORD PTR [rdx+352]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+344], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+360]
|
|
|
- mov r10, QWORD PTR [rdx+360]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+352], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+368]
|
|
|
- mov r11, QWORD PTR [rdx+368]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+360], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+376]
|
|
|
- mov r12, QWORD PTR [rdx+376]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+368], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+384]
|
|
|
- mov r10, QWORD PTR [rdx+384]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+376], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+392]
|
|
|
- mov r11, QWORD PTR [rdx+392]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+384], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+400]
|
|
|
- mov r12, QWORD PTR [rdx+400]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+392], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+408]
|
|
|
- mov r10, QWORD PTR [rdx+408]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+400], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+416]
|
|
|
- mov r11, QWORD PTR [rdx+416]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+408], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+424]
|
|
|
- mov r12, QWORD PTR [rdx+424]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+416], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+432]
|
|
|
- mov r10, QWORD PTR [rdx+432]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+424], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+440]
|
|
|
- mov r11, QWORD PTR [rdx+440]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+432], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+448]
|
|
|
- mov r12, QWORD PTR [rdx+448]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+440], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+456]
|
|
|
- mov r10, QWORD PTR [rdx+456]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+448], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+464]
|
|
|
- mov r11, QWORD PTR [rdx+464]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+456], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+472]
|
|
|
- mov r12, QWORD PTR [rdx+472]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+464], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+480]
|
|
|
- mov r10, QWORD PTR [rdx+480]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+472], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+488]
|
|
|
- mov r11, QWORD PTR [rdx+488]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+480], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+496]
|
|
|
- mov r12, QWORD PTR [rdx+496]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+488], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+504]
|
|
|
- mov r10, QWORD PTR [rdx+504]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+496], r12
|
|
|
- sbb r10, r11
|
|
|
- mov QWORD PTR [rcx+504], r10
|
|
|
- sbb rax, rax
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_cond_sub_avx2_64 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Compare a with b in constant time.
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; * return -ve, 0 or +ve if a is less than, equal to or greater than b
|
|
|
-; * respectively.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_cmp_64 PROC
|
|
|
- push r12
|
|
|
- xor r9, r9
|
|
|
- mov r8, -1
|
|
|
- mov rax, -1
|
|
|
- mov r10, 1
|
|
|
- mov r11, QWORD PTR [rcx+504]
|
|
|
- mov r12, QWORD PTR [rdx+504]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+496]
|
|
|
- mov r12, QWORD PTR [rdx+496]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+488]
|
|
|
- mov r12, QWORD PTR [rdx+488]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+480]
|
|
|
- mov r12, QWORD PTR [rdx+480]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+472]
|
|
|
- mov r12, QWORD PTR [rdx+472]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+464]
|
|
|
- mov r12, QWORD PTR [rdx+464]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+456]
|
|
|
- mov r12, QWORD PTR [rdx+456]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+448]
|
|
|
- mov r12, QWORD PTR [rdx+448]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+440]
|
|
|
- mov r12, QWORD PTR [rdx+440]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+432]
|
|
|
- mov r12, QWORD PTR [rdx+432]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+424]
|
|
|
- mov r12, QWORD PTR [rdx+424]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+416]
|
|
|
- mov r12, QWORD PTR [rdx+416]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+408]
|
|
|
- mov r12, QWORD PTR [rdx+408]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+400]
|
|
|
- mov r12, QWORD PTR [rdx+400]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+392]
|
|
|
- mov r12, QWORD PTR [rdx+392]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+384]
|
|
|
- mov r12, QWORD PTR [rdx+384]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+376]
|
|
|
- mov r12, QWORD PTR [rdx+376]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+368]
|
|
|
- mov r12, QWORD PTR [rdx+368]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+360]
|
|
|
- mov r12, QWORD PTR [rdx+360]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+352]
|
|
|
- mov r12, QWORD PTR [rdx+352]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+344]
|
|
|
- mov r12, QWORD PTR [rdx+344]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+336]
|
|
|
- mov r12, QWORD PTR [rdx+336]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+328]
|
|
|
- mov r12, QWORD PTR [rdx+328]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+320]
|
|
|
- mov r12, QWORD PTR [rdx+320]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+312]
|
|
|
- mov r12, QWORD PTR [rdx+312]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+304]
|
|
|
- mov r12, QWORD PTR [rdx+304]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+296]
|
|
|
- mov r12, QWORD PTR [rdx+296]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+288]
|
|
|
- mov r12, QWORD PTR [rdx+288]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+280]
|
|
|
- mov r12, QWORD PTR [rdx+280]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+272]
|
|
|
- mov r12, QWORD PTR [rdx+272]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+264]
|
|
|
- mov r12, QWORD PTR [rdx+264]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+256]
|
|
|
- mov r12, QWORD PTR [rdx+256]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+248]
|
|
|
- mov r12, QWORD PTR [rdx+248]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+240]
|
|
|
- mov r12, QWORD PTR [rdx+240]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+232]
|
|
|
- mov r12, QWORD PTR [rdx+232]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+224]
|
|
|
- mov r12, QWORD PTR [rdx+224]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+216]
|
|
|
- mov r12, QWORD PTR [rdx+216]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+208]
|
|
|
- mov r12, QWORD PTR [rdx+208]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+200]
|
|
|
- mov r12, QWORD PTR [rdx+200]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+192]
|
|
|
- mov r12, QWORD PTR [rdx+192]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+184]
|
|
|
- mov r12, QWORD PTR [rdx+184]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+176]
|
|
|
- mov r12, QWORD PTR [rdx+176]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+168]
|
|
|
- mov r12, QWORD PTR [rdx+168]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+160]
|
|
|
- mov r12, QWORD PTR [rdx+160]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+152]
|
|
|
- mov r12, QWORD PTR [rdx+152]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+144]
|
|
|
- mov r12, QWORD PTR [rdx+144]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+136]
|
|
|
- mov r12, QWORD PTR [rdx+136]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+128]
|
|
|
- mov r12, QWORD PTR [rdx+128]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+120]
|
|
|
- mov r12, QWORD PTR [rdx+120]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+104]
|
|
|
- mov r12, QWORD PTR [rdx+104]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+96]
|
|
|
- mov r12, QWORD PTR [rdx+96]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+80]
|
|
|
- mov r12, QWORD PTR [rdx+80]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+72]
|
|
|
- mov r12, QWORD PTR [rdx+72]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- mov r12, QWORD PTR [rdx+56]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+48]
|
|
|
- mov r12, QWORD PTR [rdx+48]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+32]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+8]
|
|
|
- mov r12, QWORD PTR [rdx+8]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx]
|
|
|
- mov r12, QWORD PTR [rdx]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- xor rax, r8
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_cmp_64 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_get_from_table_64 PROC
|
|
|
- sub rsp, 128
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- mov rax, 1
|
|
|
- movd xmm10, r8
|
|
|
- movd xmm11, rax
|
|
|
- pxor xmm13, xmm13
|
|
|
- pshufd xmm11, xmm11, 0
|
|
|
- pshufd xmm10, xmm10, 0
|
|
|
- ; START: 0-7
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 0-7
|
|
|
- ; START: 8-15
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 64
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 8-15
|
|
|
- ; START: 16-23
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 128
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 16-23
|
|
|
- ; START: 24-31
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 192
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 24-31
|
|
|
- ; START: 32-39
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 256
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 32-39
|
|
|
- ; START: 40-47
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 320
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 40-47
|
|
|
- ; START: 48-55
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 384
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- add rcx, 64
|
|
|
- ; END: 48-55
|
|
|
- ; START: 56-63
|
|
|
- pxor xmm13, xmm13
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- pxor xmm6, xmm6
|
|
|
- pxor xmm7, xmm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 448
|
|
|
- movdqu xmm12, xmm13
|
|
|
- pcmpeqd xmm12, xmm10
|
|
|
- movdqu xmm0, [r9]
|
|
|
- movdqu xmm1, [r9+16]
|
|
|
- movdqu xmm2, [r9+32]
|
|
|
- movdqu xmm3, [r9+48]
|
|
|
- pand xmm0, xmm12
|
|
|
- pand xmm1, xmm12
|
|
|
- pand xmm2, xmm12
|
|
|
- pand xmm3, xmm12
|
|
|
- por xmm4, xmm0
|
|
|
- por xmm5, xmm1
|
|
|
- por xmm6, xmm2
|
|
|
- por xmm7, xmm3
|
|
|
- paddd xmm13, xmm11
|
|
|
- movdqu [rcx], xmm4
|
|
|
- movdqu [rcx+16], xmm5
|
|
|
- movdqu [rcx+32], xmm6
|
|
|
- movdqu [rcx+48], xmm7
|
|
|
- ; END: 56-63
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_4096_get_from_table_64 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Reduce the number back to 4096 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_mont_reduce_avx2_64 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- mov r9, rcx
|
|
|
- mov r10, rdx
|
|
|
- xor rbp, rbp
|
|
|
- ; i = 64
|
|
|
- mov r11, 64
|
|
|
- mov r14, QWORD PTR [r9]
|
|
|
- mov r15, QWORD PTR [r9+8]
|
|
|
- mov rdi, QWORD PTR [r9+16]
|
|
|
- mov rsi, QWORD PTR [r9+24]
|
|
|
- add r9, 256
|
|
|
- xor rbp, rbp
|
|
|
-L_4096_mont_reduce_avx2_64_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r12, r14
|
|
|
- imul rdx, r8
|
|
|
- xor rbx, rbx
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10]
|
|
|
- mov r14, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r14, rcx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+8]
|
|
|
- mov r15, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+16]
|
|
|
- mov rdi, rsi
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+24]
|
|
|
- mov rsi, QWORD PTR [r9+-224]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+32]
|
|
|
- mov r13, QWORD PTR [r9+-216]
|
|
|
- adcx rsi, rax
|
|
|
- adox r13, rcx
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+40]
|
|
|
- mov r12, QWORD PTR [r9+-208]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-216], r13
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+48]
|
|
|
- mov r13, QWORD PTR [r9+-200]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-208], r12
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+56]
|
|
|
- mov r12, QWORD PTR [r9+-192]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-200], r13
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+64]
|
|
|
- mov r13, QWORD PTR [r9+-184]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-192], r12
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+72]
|
|
|
- mov r12, QWORD PTR [r9+-176]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-184], r13
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+80]
|
|
|
- mov r13, QWORD PTR [r9+-168]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-176], r12
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+88]
|
|
|
- mov r12, QWORD PTR [r9+-160]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-168], r13
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+96]
|
|
|
- mov r13, QWORD PTR [r9+-152]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-160], r12
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+104]
|
|
|
- mov r12, QWORD PTR [r9+-144]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-152], r13
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+112]
|
|
|
- mov r13, QWORD PTR [r9+-136]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-144], r12
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+120]
|
|
|
- mov r12, QWORD PTR [r9+-128]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-136], r13
|
|
|
- ; a[i+16] += m[16] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+128]
|
|
|
- mov r13, QWORD PTR [r9+-120]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-128], r12
|
|
|
- ; a[i+17] += m[17] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+136]
|
|
|
- mov r12, QWORD PTR [r9+-112]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-120], r13
|
|
|
- ; a[i+18] += m[18] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+144]
|
|
|
- mov r13, QWORD PTR [r9+-104]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-112], r12
|
|
|
- ; a[i+19] += m[19] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+152]
|
|
|
- mov r12, QWORD PTR [r9+-96]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-104], r13
|
|
|
- ; a[i+20] += m[20] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+160]
|
|
|
- mov r13, QWORD PTR [r9+-88]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-96], r12
|
|
|
- ; a[i+21] += m[21] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+168]
|
|
|
- mov r12, QWORD PTR [r9+-80]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-88], r13
|
|
|
- ; a[i+22] += m[22] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+176]
|
|
|
- mov r13, QWORD PTR [r9+-72]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-80], r12
|
|
|
- ; a[i+23] += m[23] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+184]
|
|
|
- mov r12, QWORD PTR [r9+-64]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-72], r13
|
|
|
- ; a[i+24] += m[24] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+192]
|
|
|
- mov r13, QWORD PTR [r9+-56]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-64], r12
|
|
|
- ; a[i+25] += m[25] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+200]
|
|
|
- mov r12, QWORD PTR [r9+-48]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-56], r13
|
|
|
- ; a[i+26] += m[26] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+208]
|
|
|
- mov r13, QWORD PTR [r9+-40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-48], r12
|
|
|
- ; a[i+27] += m[27] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+216]
|
|
|
- mov r12, QWORD PTR [r9+-32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-40], r13
|
|
|
- ; a[i+28] += m[28] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+224]
|
|
|
- mov r13, QWORD PTR [r9+-24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-32], r12
|
|
|
- ; a[i+29] += m[29] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+232]
|
|
|
- mov r12, QWORD PTR [r9+-16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-24], r13
|
|
|
- ; a[i+30] += m[30] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+240]
|
|
|
- mov r13, QWORD PTR [r9+-8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-16], r12
|
|
|
- ; a[i+31] += m[31] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+248]
|
|
|
- mov r12, QWORD PTR [r9]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-8], r13
|
|
|
- ; a[i+32] += m[32] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+256]
|
|
|
- mov r13, QWORD PTR [r9+8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9], r12
|
|
|
- ; a[i+33] += m[33] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+264]
|
|
|
- mov r12, QWORD PTR [r9+16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+8], r13
|
|
|
- ; a[i+34] += m[34] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+272]
|
|
|
- mov r13, QWORD PTR [r9+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+16], r12
|
|
|
- ; a[i+35] += m[35] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+280]
|
|
|
- mov r12, QWORD PTR [r9+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+24], r13
|
|
|
- ; a[i+36] += m[36] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+288]
|
|
|
- mov r13, QWORD PTR [r9+40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+32], r12
|
|
|
- ; a[i+37] += m[37] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+296]
|
|
|
- mov r12, QWORD PTR [r9+48]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+40], r13
|
|
|
- ; a[i+38] += m[38] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+304]
|
|
|
- mov r13, QWORD PTR [r9+56]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+48], r12
|
|
|
- ; a[i+39] += m[39] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+312]
|
|
|
- mov r12, QWORD PTR [r9+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+56], r13
|
|
|
- ; a[i+40] += m[40] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+320]
|
|
|
- mov r13, QWORD PTR [r9+72]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+64], r12
|
|
|
- ; a[i+41] += m[41] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+328]
|
|
|
- mov r12, QWORD PTR [r9+80]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+72], r13
|
|
|
- ; a[i+42] += m[42] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+336]
|
|
|
- mov r13, QWORD PTR [r9+88]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+80], r12
|
|
|
- ; a[i+43] += m[43] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+344]
|
|
|
- mov r12, QWORD PTR [r9+96]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+88], r13
|
|
|
- ; a[i+44] += m[44] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+352]
|
|
|
- mov r13, QWORD PTR [r9+104]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+96], r12
|
|
|
- ; a[i+45] += m[45] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+360]
|
|
|
- mov r12, QWORD PTR [r9+112]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+104], r13
|
|
|
- ; a[i+46] += m[46] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+368]
|
|
|
- mov r13, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+112], r12
|
|
|
- ; a[i+47] += m[47] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+376]
|
|
|
- mov r12, QWORD PTR [r9+128]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+120], r13
|
|
|
- ; a[i+48] += m[48] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+384]
|
|
|
- mov r13, QWORD PTR [r9+136]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+128], r12
|
|
|
- ; a[i+49] += m[49] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+392]
|
|
|
- mov r12, QWORD PTR [r9+144]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+136], r13
|
|
|
- ; a[i+50] += m[50] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+400]
|
|
|
- mov r13, QWORD PTR [r9+152]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+144], r12
|
|
|
- ; a[i+51] += m[51] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+408]
|
|
|
- mov r12, QWORD PTR [r9+160]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+152], r13
|
|
|
- ; a[i+52] += m[52] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+416]
|
|
|
- mov r13, QWORD PTR [r9+168]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+160], r12
|
|
|
- ; a[i+53] += m[53] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+424]
|
|
|
- mov r12, QWORD PTR [r9+176]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+168], r13
|
|
|
- ; a[i+54] += m[54] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+432]
|
|
|
- mov r13, QWORD PTR [r9+184]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+176], r12
|
|
|
- ; a[i+55] += m[55] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+440]
|
|
|
- mov r12, QWORD PTR [r9+192]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+184], r13
|
|
|
- ; a[i+56] += m[56] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+448]
|
|
|
- mov r13, QWORD PTR [r9+200]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+192], r12
|
|
|
- ; a[i+57] += m[57] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+456]
|
|
|
- mov r12, QWORD PTR [r9+208]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+200], r13
|
|
|
- ; a[i+58] += m[58] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+464]
|
|
|
- mov r13, QWORD PTR [r9+216]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+208], r12
|
|
|
- ; a[i+59] += m[59] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+472]
|
|
|
- mov r12, QWORD PTR [r9+224]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+216], r13
|
|
|
- ; a[i+60] += m[60] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+480]
|
|
|
- mov r13, QWORD PTR [r9+232]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+224], r12
|
|
|
- ; a[i+61] += m[61] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+488]
|
|
|
- mov r12, QWORD PTR [r9+240]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+232], r13
|
|
|
- ; a[i+62] += m[62] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+496]
|
|
|
- mov r13, QWORD PTR [r9+248]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+240], r12
|
|
|
- ; a[i+63] += m[63] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+504]
|
|
|
- mov r12, QWORD PTR [r9+256]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+248], r13
|
|
|
- adcx r12, rbp
|
|
|
- mov rbp, rbx
|
|
|
- mov QWORD PTR [r9+256], r12
|
|
|
- adox rbp, rbx
|
|
|
- adcx rbp, rbx
|
|
|
- ; a += 1
|
|
|
- add r9, 8
|
|
|
- ; i -= 1
|
|
|
- sub r11, 1
|
|
|
- jnz L_4096_mont_reduce_avx2_64_loop
|
|
|
- sub r9, 256
|
|
|
- neg rbp
|
|
|
- mov r8, r9
|
|
|
- sub r9, 512
|
|
|
- mov rcx, QWORD PTR [r10]
|
|
|
- mov rdx, r14
|
|
|
- pext rcx, rcx, rbp
|
|
|
- sub rdx, rcx
|
|
|
- mov rcx, QWORD PTR [r10+8]
|
|
|
- mov rax, r15
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+16]
|
|
|
- mov rcx, rdi
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+8], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov rdx, rsi
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+16], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+24], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+40]
|
|
|
- mov rcx, QWORD PTR [r8+40]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+32], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- mov rdx, QWORD PTR [r8+48]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+40], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+56]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+48], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+64]
|
|
|
- mov rcx, QWORD PTR [r8+64]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+56], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov rdx, QWORD PTR [r8+72]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+64], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+72], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+88]
|
|
|
- mov rcx, QWORD PTR [r8+88]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+80], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- mov rdx, QWORD PTR [r8+96]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+88], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+104]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+96], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+112]
|
|
|
- mov rcx, QWORD PTR [r8+112]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+104], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov rdx, QWORD PTR [r8+120]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+112], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+128]
|
|
|
- mov rax, QWORD PTR [r8+128]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+120], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+136]
|
|
|
- mov rcx, QWORD PTR [r8+136]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+128], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+144]
|
|
|
- mov rdx, QWORD PTR [r8+144]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+136], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+152]
|
|
|
- mov rax, QWORD PTR [r8+152]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+144], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+160]
|
|
|
- mov rcx, QWORD PTR [r8+160]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+152], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+168]
|
|
|
- mov rdx, QWORD PTR [r8+168]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+160], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+176]
|
|
|
- mov rax, QWORD PTR [r8+176]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+168], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+184]
|
|
|
- mov rcx, QWORD PTR [r8+184]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+176], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+192]
|
|
|
- mov rdx, QWORD PTR [r8+192]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+184], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+200]
|
|
|
- mov rax, QWORD PTR [r8+200]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+192], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+208]
|
|
|
- mov rcx, QWORD PTR [r8+208]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+200], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+216]
|
|
|
- mov rdx, QWORD PTR [r8+216]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+208], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+224]
|
|
|
- mov rax, QWORD PTR [r8+224]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+216], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+232]
|
|
|
- mov rcx, QWORD PTR [r8+232]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+224], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+240]
|
|
|
- mov rdx, QWORD PTR [r8+240]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+232], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+248]
|
|
|
- mov rax, QWORD PTR [r8+248]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+240], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+256]
|
|
|
- mov rcx, QWORD PTR [r8+256]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+248], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+264]
|
|
|
- mov rdx, QWORD PTR [r8+264]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+256], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+272]
|
|
|
- mov rax, QWORD PTR [r8+272]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+264], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+280]
|
|
|
- mov rcx, QWORD PTR [r8+280]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+272], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+288]
|
|
|
- mov rdx, QWORD PTR [r8+288]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+280], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+296]
|
|
|
- mov rax, QWORD PTR [r8+296]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+288], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+304]
|
|
|
- mov rcx, QWORD PTR [r8+304]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+296], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+312]
|
|
|
- mov rdx, QWORD PTR [r8+312]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+304], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+320]
|
|
|
- mov rax, QWORD PTR [r8+320]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+312], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+328]
|
|
|
- mov rcx, QWORD PTR [r8+328]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+320], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+336]
|
|
|
- mov rdx, QWORD PTR [r8+336]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+328], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+344]
|
|
|
- mov rax, QWORD PTR [r8+344]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+336], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+352]
|
|
|
- mov rcx, QWORD PTR [r8+352]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+344], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+360]
|
|
|
- mov rdx, QWORD PTR [r8+360]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+352], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+368]
|
|
|
- mov rax, QWORD PTR [r8+368]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+360], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+376]
|
|
|
- mov rcx, QWORD PTR [r8+376]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+368], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+384]
|
|
|
- mov rdx, QWORD PTR [r8+384]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+376], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+392]
|
|
|
- mov rax, QWORD PTR [r8+392]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+384], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+400]
|
|
|
- mov rcx, QWORD PTR [r8+400]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+392], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+408]
|
|
|
- mov rdx, QWORD PTR [r8+408]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+400], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+416]
|
|
|
- mov rax, QWORD PTR [r8+416]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+408], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+424]
|
|
|
- mov rcx, QWORD PTR [r8+424]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+416], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+432]
|
|
|
- mov rdx, QWORD PTR [r8+432]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+424], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+440]
|
|
|
- mov rax, QWORD PTR [r8+440]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+432], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+448]
|
|
|
- mov rcx, QWORD PTR [r8+448]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+440], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+456]
|
|
|
- mov rdx, QWORD PTR [r8+456]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+448], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+464]
|
|
|
- mov rax, QWORD PTR [r8+464]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+456], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+472]
|
|
|
- mov rcx, QWORD PTR [r8+472]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+464], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+480]
|
|
|
- mov rdx, QWORD PTR [r8+480]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+472], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+488]
|
|
|
- mov rax, QWORD PTR [r8+488]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+480], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+496]
|
|
|
- mov rcx, QWORD PTR [r8+496]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+488], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+504]
|
|
|
- mov rdx, QWORD PTR [r8+504]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+496], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov QWORD PTR [r9+504], rdx
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_mont_reduce_avx2_64 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_get_from_table_avx2_64 PROC
|
|
|
- sub rsp, 128
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- mov rax, 1
|
|
|
- movd xmm10, r8
|
|
|
- movd xmm11, rax
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpermd ymm10, ymm13, ymm10
|
|
|
- vpermd ymm11, ymm13, ymm11
|
|
|
- ; START: 0-15
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpxor ymm7, ymm7, ymm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- vmovdqu YMMWORD PTR [rcx+64], ymm6
|
|
|
- vmovdqu YMMWORD PTR [rcx+96], ymm7
|
|
|
- add rcx, 128
|
|
|
- ; END: 0-15
|
|
|
- ; START: 16-31
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpxor ymm7, ymm7, ymm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 128
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- vmovdqu YMMWORD PTR [rcx+64], ymm6
|
|
|
- vmovdqu YMMWORD PTR [rcx+96], ymm7
|
|
|
- add rcx, 128
|
|
|
- ; END: 16-31
|
|
|
- ; START: 32-47
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpxor ymm7, ymm7, ymm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 256
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- vmovdqu YMMWORD PTR [rcx+64], ymm6
|
|
|
- vmovdqu YMMWORD PTR [rcx+96], ymm7
|
|
|
- add rcx, 128
|
|
|
- ; END: 32-47
|
|
|
- ; START: 48-63
|
|
|
- vpxor ymm13, ymm13, ymm13
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpxor ymm7, ymm7, ymm7
|
|
|
- ; ENTRY: 0
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 1
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 2
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 3
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 4
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 5
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 6
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 7
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 8
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 9
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 10
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 11
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 12
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 13
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 14
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- ; ENTRY: 15
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- add r9, 384
|
|
|
- vpcmpeqd ymm12, ymm13, ymm10
|
|
|
- vmovdqu ymm0, YMMWORD PTR [r9]
|
|
|
- vmovdqu ymm1, YMMWORD PTR [r9+32]
|
|
|
- vmovdqu ymm2, YMMWORD PTR [r9+64]
|
|
|
- vmovdqu ymm3, YMMWORD PTR [r9+96]
|
|
|
- vpand ymm0, ymm0, ymm12
|
|
|
- vpand ymm1, ymm1, ymm12
|
|
|
- vpand ymm2, ymm2, ymm12
|
|
|
- vpand ymm3, ymm3, ymm12
|
|
|
- vpor ymm4, ymm4, ymm0
|
|
|
- vpor ymm5, ymm5, ymm1
|
|
|
- vpor ymm6, ymm6, ymm2
|
|
|
- vpor ymm7, ymm7, ymm3
|
|
|
- vpaddd ymm13, ymm13, ymm11
|
|
|
- vmovdqu YMMWORD PTR [rcx], ymm4
|
|
|
- vmovdqu YMMWORD PTR [rcx+32], ymm5
|
|
|
- vmovdqu YMMWORD PTR [rcx+64], ymm6
|
|
|
- vmovdqu YMMWORD PTR [rcx+96], ymm7
|
|
|
- ; END: 48-63
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_4096_get_from_table_avx2_64 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Conditionally add a and b using the mask m.
|
|
|
-; * m is -1 to add and 0 when not.
|
|
|
-; *
|
|
|
-; * r A single precision number representing conditional add result.
|
|
|
-; * a A single precision number to add with.
|
|
|
-; * b A single precision number to add.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_cond_add_32 PROC
|
|
|
- sub rsp, 256
|
|
|
- mov rax, 0
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov r10, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [r8+136]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+128], r10
|
|
|
- mov QWORD PTR [rsp+136], r11
|
|
|
- mov r10, QWORD PTR [r8+144]
|
|
|
- mov r11, QWORD PTR [r8+152]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+144], r10
|
|
|
- mov QWORD PTR [rsp+152], r11
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+160], r10
|
|
|
- mov QWORD PTR [rsp+168], r11
|
|
|
- mov r10, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [r8+184]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+176], r10
|
|
|
- mov QWORD PTR [rsp+184], r11
|
|
|
- mov r10, QWORD PTR [r8+192]
|
|
|
- mov r11, QWORD PTR [r8+200]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+192], r10
|
|
|
- mov QWORD PTR [rsp+200], r11
|
|
|
- mov r10, QWORD PTR [r8+208]
|
|
|
- mov r11, QWORD PTR [r8+216]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+208], r10
|
|
|
- mov QWORD PTR [rsp+216], r11
|
|
|
- mov r10, QWORD PTR [r8+224]
|
|
|
- mov r11, QWORD PTR [r8+232]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+224], r10
|
|
|
- mov QWORD PTR [rsp+232], r11
|
|
|
- mov r10, QWORD PTR [r8+240]
|
|
|
- mov r11, QWORD PTR [r8+248]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+240], r10
|
|
|
- mov QWORD PTR [rsp+248], r11
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rsp]
|
|
|
- add r10, r8
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rsp+8]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r8, QWORD PTR [rsp+16]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rsp+24]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rsp+32]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rsp+40]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r8, QWORD PTR [rsp+48]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rsp+56]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov r8, QWORD PTR [rsp+64]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov r11, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rsp+72]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r8, QWORD PTR [rsp+80]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rsp+88]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- mov r8, QWORD PTR [rsp+96]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rsp+104]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r8, QWORD PTR [rsp+112]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rsp+120]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov r10, QWORD PTR [rdx+128]
|
|
|
- mov r8, QWORD PTR [rsp+128]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- mov r11, QWORD PTR [rdx+136]
|
|
|
- mov r8, QWORD PTR [rsp+136]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+128], r10
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- mov r8, QWORD PTR [rsp+144]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+136], r11
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rsp+152]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- mov r10, QWORD PTR [rdx+160]
|
|
|
- mov r8, QWORD PTR [rsp+160]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- mov r11, QWORD PTR [rdx+168]
|
|
|
- mov r8, QWORD PTR [rsp+168]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+160], r10
|
|
|
- mov r10, QWORD PTR [rdx+176]
|
|
|
- mov r8, QWORD PTR [rsp+176]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+168], r11
|
|
|
- mov r11, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rsp+184]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+176], r10
|
|
|
- mov r10, QWORD PTR [rdx+192]
|
|
|
- mov r8, QWORD PTR [rsp+192]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+184], r11
|
|
|
- mov r11, QWORD PTR [rdx+200]
|
|
|
- mov r8, QWORD PTR [rsp+200]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- mov r10, QWORD PTR [rdx+208]
|
|
|
- mov r8, QWORD PTR [rsp+208]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- mov r11, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [rsp+216]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+208], r10
|
|
|
- mov r10, QWORD PTR [rdx+224]
|
|
|
- mov r8, QWORD PTR [rsp+224]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+216], r11
|
|
|
- mov r11, QWORD PTR [rdx+232]
|
|
|
- mov r8, QWORD PTR [rsp+232]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+224], r10
|
|
|
- mov r10, QWORD PTR [rdx+240]
|
|
|
- mov r8, QWORD PTR [rsp+240]
|
|
|
- adc r10, r8
|
|
|
- mov QWORD PTR [rcx+232], r11
|
|
|
- mov r11, QWORD PTR [rdx+248]
|
|
|
- mov r8, QWORD PTR [rsp+248]
|
|
|
- adc r11, r8
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- adc rax, 0
|
|
|
- add rsp, 256
|
|
|
- ret
|
|
|
-sp_4096_cond_add_32 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally add a and b using the mask m.
|
|
|
-; * m is -1 to add and 0 when not.
|
|
|
-; *
|
|
|
-; * r A single precision number representing conditional add result.
|
|
|
-; * a A single precision number to add with.
|
|
|
-; * b A single precision number to add.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_cond_add_avx2_32 PROC
|
|
|
- push r12
|
|
|
- mov rax, 0
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- pext r12, r12, r9
|
|
|
- add r10, r12
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+104]
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+128]
|
|
|
- mov r11, QWORD PTR [rdx+128]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+136]
|
|
|
- mov r12, QWORD PTR [rdx+136]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+144]
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+152]
|
|
|
- mov r11, QWORD PTR [rdx+152]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+160]
|
|
|
- mov r12, QWORD PTR [rdx+160]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+168]
|
|
|
- mov r10, QWORD PTR [rdx+168]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+176]
|
|
|
- mov r11, QWORD PTR [rdx+176]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+184]
|
|
|
- mov r12, QWORD PTR [rdx+184]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+192]
|
|
|
- mov r10, QWORD PTR [rdx+192]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+200]
|
|
|
- mov r11, QWORD PTR [rdx+200]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+208]
|
|
|
- mov r12, QWORD PTR [rdx+208]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+216]
|
|
|
- mov r10, QWORD PTR [rdx+216]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+208], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+224]
|
|
|
- mov r11, QWORD PTR [rdx+224]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- adc r11, r12
|
|
|
- mov r10, QWORD PTR [r8+232]
|
|
|
- mov r12, QWORD PTR [rdx+232]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- adc r12, r10
|
|
|
- mov r11, QWORD PTR [r8+240]
|
|
|
- mov r10, QWORD PTR [rdx+240]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- adc r10, r11
|
|
|
- mov r12, QWORD PTR [r8+248]
|
|
|
- mov r11, QWORD PTR [rdx+248]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- adc r11, r12
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- adc rax, 0
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_cond_add_avx2_32 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Shift number left by n bit. (r = a << n)
|
|
|
-; *
|
|
|
-; * r Result of left shift by n.
|
|
|
-; * a Number to shift.
|
|
|
-; * n Amoutnt o shift.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_4096_lshift_64 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov cl, r8b
|
|
|
- mov rax, rcx
|
|
|
- mov r12, 0
|
|
|
- mov r13, QWORD PTR [rdx+472]
|
|
|
- mov r8, QWORD PTR [rdx+480]
|
|
|
- mov r9, QWORD PTR [rdx+488]
|
|
|
- mov r10, QWORD PTR [rdx+496]
|
|
|
- mov r11, QWORD PTR [rdx+504]
|
|
|
- shld r12, r11, cl
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+480], r8
|
|
|
- mov QWORD PTR [rax+488], r9
|
|
|
- mov QWORD PTR [rax+496], r10
|
|
|
- mov QWORD PTR [rax+504], r11
|
|
|
- mov QWORD PTR [rax+512], r12
|
|
|
- mov r11, QWORD PTR [rdx+440]
|
|
|
- mov r8, QWORD PTR [rdx+448]
|
|
|
- mov r9, QWORD PTR [rdx+456]
|
|
|
- mov r10, QWORD PTR [rdx+464]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+448], r8
|
|
|
- mov QWORD PTR [rax+456], r9
|
|
|
- mov QWORD PTR [rax+464], r10
|
|
|
- mov QWORD PTR [rax+472], r13
|
|
|
- mov r13, QWORD PTR [rdx+408]
|
|
|
- mov r8, QWORD PTR [rdx+416]
|
|
|
- mov r9, QWORD PTR [rdx+424]
|
|
|
- mov r10, QWORD PTR [rdx+432]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+416], r8
|
|
|
- mov QWORD PTR [rax+424], r9
|
|
|
- mov QWORD PTR [rax+432], r10
|
|
|
- mov QWORD PTR [rax+440], r11
|
|
|
- mov r11, QWORD PTR [rdx+376]
|
|
|
- mov r8, QWORD PTR [rdx+384]
|
|
|
- mov r9, QWORD PTR [rdx+392]
|
|
|
- mov r10, QWORD PTR [rdx+400]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+384], r8
|
|
|
- mov QWORD PTR [rax+392], r9
|
|
|
- mov QWORD PTR [rax+400], r10
|
|
|
- mov QWORD PTR [rax+408], r13
|
|
|
- mov r13, QWORD PTR [rdx+344]
|
|
|
- mov r8, QWORD PTR [rdx+352]
|
|
|
- mov r9, QWORD PTR [rdx+360]
|
|
|
- mov r10, QWORD PTR [rdx+368]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+352], r8
|
|
|
- mov QWORD PTR [rax+360], r9
|
|
|
- mov QWORD PTR [rax+368], r10
|
|
|
- mov QWORD PTR [rax+376], r11
|
|
|
- mov r11, QWORD PTR [rdx+312]
|
|
|
- mov r8, QWORD PTR [rdx+320]
|
|
|
- mov r9, QWORD PTR [rdx+328]
|
|
|
- mov r10, QWORD PTR [rdx+336]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+320], r8
|
|
|
- mov QWORD PTR [rax+328], r9
|
|
|
- mov QWORD PTR [rax+336], r10
|
|
|
- mov QWORD PTR [rax+344], r13
|
|
|
- mov r13, QWORD PTR [rdx+280]
|
|
|
- mov r8, QWORD PTR [rdx+288]
|
|
|
- mov r9, QWORD PTR [rdx+296]
|
|
|
- mov r10, QWORD PTR [rdx+304]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+288], r8
|
|
|
- mov QWORD PTR [rax+296], r9
|
|
|
- mov QWORD PTR [rax+304], r10
|
|
|
- mov QWORD PTR [rax+312], r11
|
|
|
- mov r11, QWORD PTR [rdx+248]
|
|
|
- mov r8, QWORD PTR [rdx+256]
|
|
|
- mov r9, QWORD PTR [rdx+264]
|
|
|
- mov r10, QWORD PTR [rdx+272]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+256], r8
|
|
|
- mov QWORD PTR [rax+264], r9
|
|
|
- mov QWORD PTR [rax+272], r10
|
|
|
- mov QWORD PTR [rax+280], r13
|
|
|
- mov r13, QWORD PTR [rdx+216]
|
|
|
- mov r8, QWORD PTR [rdx+224]
|
|
|
- mov r9, QWORD PTR [rdx+232]
|
|
|
- mov r10, QWORD PTR [rdx+240]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+224], r8
|
|
|
- mov QWORD PTR [rax+232], r9
|
|
|
- mov QWORD PTR [rax+240], r10
|
|
|
- mov QWORD PTR [rax+248], r11
|
|
|
- mov r11, QWORD PTR [rdx+184]
|
|
|
- mov r8, QWORD PTR [rdx+192]
|
|
|
- mov r9, QWORD PTR [rdx+200]
|
|
|
- mov r10, QWORD PTR [rdx+208]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+192], r8
|
|
|
- mov QWORD PTR [rax+200], r9
|
|
|
- mov QWORD PTR [rax+208], r10
|
|
|
- mov QWORD PTR [rax+216], r13
|
|
|
- mov r13, QWORD PTR [rdx+152]
|
|
|
- mov r8, QWORD PTR [rdx+160]
|
|
|
- mov r9, QWORD PTR [rdx+168]
|
|
|
- mov r10, QWORD PTR [rdx+176]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+160], r8
|
|
|
- mov QWORD PTR [rax+168], r9
|
|
|
- mov QWORD PTR [rax+176], r10
|
|
|
- mov QWORD PTR [rax+184], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rdx+128]
|
|
|
- mov r9, QWORD PTR [rdx+136]
|
|
|
- mov r10, QWORD PTR [rdx+144]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+128], r8
|
|
|
- mov QWORD PTR [rax+136], r9
|
|
|
- mov QWORD PTR [rax+144], r10
|
|
|
- mov QWORD PTR [rax+152], r13
|
|
|
- mov r13, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+96], r8
|
|
|
- mov QWORD PTR [rax+104], r9
|
|
|
- mov QWORD PTR [rax+112], r10
|
|
|
- mov QWORD PTR [rax+120], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+64], r8
|
|
|
- mov QWORD PTR [rax+72], r9
|
|
|
- mov QWORD PTR [rax+80], r10
|
|
|
- mov QWORD PTR [rax+88], r13
|
|
|
- mov r13, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+32], r8
|
|
|
- mov QWORD PTR [rax+40], r9
|
|
|
- mov QWORD PTR [rax+48], r10
|
|
|
- mov QWORD PTR [rax+56], r11
|
|
|
- mov r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shl r8, cl
|
|
|
- mov QWORD PTR [rax], r8
|
|
|
- mov QWORD PTR [rax+8], r9
|
|
|
- mov QWORD PTR [rax+16], r10
|
|
|
- mov QWORD PTR [rax+24], r13
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_4096_lshift_64 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-IFNDEF WOLFSSL_SP_NO_256
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mul_4 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 32
|
|
|
- ; A[0] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r11, rdx
|
|
|
- ; A[0] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- ; A[0] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+16], r12
|
|
|
- ; A[0] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- ; A[1] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- ; A[2] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- ; A[3] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r10, QWORD PTR [rsp+16]
|
|
|
- mov r11, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- add rsp, 32
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mul_4 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r Result of multiplication.
|
|
|
-; * a First number to multiply.
|
|
|
-; * b Second number to multiply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mul_avx2_4 PROC
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov rbp, r8
|
|
|
- mov rax, rdx
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- mov r14, QWORD PTR [rbp+8]
|
|
|
- ; A[0] * B[0]
|
|
|
- mulx r9, r8, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- ; A[0] * B[1]
|
|
|
- mulx r10, rdi, r14
|
|
|
- adcx r9, rdi
|
|
|
- ; A[0] * B[2]
|
|
|
- mulx r11, rdi, QWORD PTR [rbp+16]
|
|
|
- adcx r10, rdi
|
|
|
- ; A[0] * B[3]
|
|
|
- mulx r12, rdi, QWORD PTR [rbp+24]
|
|
|
- adcx r11, rdi
|
|
|
- mov rdx, QWORD PTR [rax+8]
|
|
|
- adcx r12, rbx
|
|
|
- ; A[1] * B[0]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- adcx r9, rdi
|
|
|
- ; A[1] * B[1]
|
|
|
- mulx r15, rdi, r14
|
|
|
- adox r10, rsi
|
|
|
- adcx r10, rdi
|
|
|
- ; A[1] * B[2]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp+16]
|
|
|
- adox r11, r15
|
|
|
- adcx r11, rdi
|
|
|
- ; A[1] * B[3]
|
|
|
- mulx r13, rdi, QWORD PTR [rbp+24]
|
|
|
- adox r12, rsi
|
|
|
- adcx r12, rdi
|
|
|
- adox r13, rbx
|
|
|
- mov rdx, QWORD PTR [rax+16]
|
|
|
- adcx r13, rbx
|
|
|
- ; A[2] * B[0]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- adcx r10, rdi
|
|
|
- ; A[2] * B[1]
|
|
|
- mulx r15, rdi, r14
|
|
|
- adox r11, rsi
|
|
|
- adcx r11, rdi
|
|
|
- ; A[2] * B[2]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp+16]
|
|
|
- adox r12, r15
|
|
|
- adcx r12, rdi
|
|
|
- ; A[2] * B[3]
|
|
|
- mulx r14, rdi, QWORD PTR [rbp+24]
|
|
|
- adox r13, rsi
|
|
|
- adcx r13, rdi
|
|
|
- adox r14, rbx
|
|
|
- mov rdx, QWORD PTR [rax+24]
|
|
|
- adcx r14, rbx
|
|
|
- ; A[3] * B[0]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- adcx r11, rdi
|
|
|
- ; A[3] * B[1]
|
|
|
- mulx r15, rdi, QWORD PTR [rbp+8]
|
|
|
- adox r12, rsi
|
|
|
- adcx r12, rdi
|
|
|
- ; A[3] * B[2]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp+16]
|
|
|
- adox r13, r15
|
|
|
- adcx r13, rdi
|
|
|
- ; A[3] * B[3]
|
|
|
- mulx r15, rdi, QWORD PTR [rbp+24]
|
|
|
- adox r14, rsi
|
|
|
- adcx r14, rdi
|
|
|
- adox r15, rbx
|
|
|
- adcx r15, rbx
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov QWORD PTR [rcx+32], r12
|
|
|
- mov QWORD PTR [rcx+40], r13
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- mov QWORD PTR [rcx+56], r15
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- ret
|
|
|
-sp_256_mul_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_sqr_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- mov r8, rdx
|
|
|
- sub rsp, 32
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul rax
|
|
|
- xor r11, r11
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r10, rdx
|
|
|
- ; A[0] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsp+8], r10
|
|
|
- ; A[0] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+16], r11
|
|
|
- ; A[0] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+24], r9
|
|
|
- ; A[1] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- ; A[2] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul rax
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r12, QWORD PTR [rsp+16]
|
|
|
- mov r13, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- mov QWORD PTR [rcx+24], r13
|
|
|
- add rsp, 32
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_sqr_4 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r Result of squaring.
|
|
|
-; * a Number to square in Montgomery form.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_sqr_avx2_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov rax, rdx
|
|
|
- xor r8, r8
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- mov rsi, QWORD PTR [rax+8]
|
|
|
- mov rbx, QWORD PTR [rax+16]
|
|
|
- mov r15, QWORD PTR [rax+24]
|
|
|
- ; A[0] * A[1]
|
|
|
- mulx r10, r9, rsi
|
|
|
- ; A[0] * A[2]
|
|
|
- mulx r11, r8, rbx
|
|
|
- adox r10, r8
|
|
|
- ; A[0] * A[3]
|
|
|
- mulx r12, r8, r15
|
|
|
- mov rdx, rsi
|
|
|
- adox r11, r8
|
|
|
- ; A[1] * A[2]
|
|
|
- mulx rdi, r8, rbx
|
|
|
- mov rdx, r15
|
|
|
- adcx r11, r8
|
|
|
- ; A[1] * A[3]
|
|
|
- mulx r13, r8, rsi
|
|
|
- mov r15, 0
|
|
|
- adox r12, rdi
|
|
|
- adcx r12, r8
|
|
|
- ; A[2] * A[3]
|
|
|
- mulx r14, r8, rbx
|
|
|
- adox r13, r15
|
|
|
- adcx r13, r8
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- ; Double with Carry Flag
|
|
|
- xor r15, r15
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- mulx rdi, r8, rdx
|
|
|
- adcx r9, r9
|
|
|
- adcx r10, r10
|
|
|
- adox r9, rdi
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rdx, QWORD PTR [rax+8]
|
|
|
- mulx rbx, rsi, rdx
|
|
|
- adcx r11, r11
|
|
|
- adox r10, rsi
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rdx, QWORD PTR [rax+16]
|
|
|
- mulx rsi, rdi, rdx
|
|
|
- adcx r12, r12
|
|
|
- adox r11, rbx
|
|
|
- adcx r13, r13
|
|
|
- adox r12, rdi
|
|
|
- adcx r14, r14
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rdx, QWORD PTR [rax+24]
|
|
|
- mulx rbx, rdi, rdx
|
|
|
- adox r13, rsi
|
|
|
- adcx r15, r15
|
|
|
- adox r14, rdi
|
|
|
- adox r15, rbx
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov QWORD PTR [rcx+32], r12
|
|
|
- mov QWORD PTR [rcx+40], r13
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- mov QWORD PTR [rcx+56], r15
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_sqr_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Add b to a into r. (r = a + b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_add_4 PROC
|
|
|
- push r12
|
|
|
- xor rax, rax
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov r11, QWORD PTR [rdx+16]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- adc r11, QWORD PTR [r8+16]
|
|
|
- adc r12, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- adc rax, 0
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_add_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Sub b from a into r. (r = a - b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_sub_4 PROC
|
|
|
- push r12
|
|
|
- xor rax, rax
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov r11, QWORD PTR [rdx+16]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- sub r9, QWORD PTR [r8]
|
|
|
- sbb r10, QWORD PTR [r8+8]
|
|
|
- sbb r11, QWORD PTR [r8+16]
|
|
|
- sbb r12, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- sbb rax, rax
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_sub_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Conditionally copy a into r using the mask m.
|
|
|
-; * m is -1 to copy and 0 when not.
|
|
|
-; *
|
|
|
-; * r A single precision number to copy over.
|
|
|
-; * a A single precision number to copy.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_cond_copy_4 PROC
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [rcx+16]
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- xor rax, QWORD PTR [rdx]
|
|
|
- xor r9, QWORD PTR [rdx+8]
|
|
|
- xor r10, QWORD PTR [rdx+16]
|
|
|
- xor r11, QWORD PTR [rdx+24]
|
|
|
- and rax, r8
|
|
|
- and r9, r8
|
|
|
- and r10, r8
|
|
|
- and r11, r8
|
|
|
- xor QWORD PTR [rcx], rax
|
|
|
- xor QWORD PTR [rcx+8], r9
|
|
|
- xor QWORD PTR [rcx+16], r10
|
|
|
- xor QWORD PTR [rcx+24], r11
|
|
|
- ret
|
|
|
-sp_256_cond_copy_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Multiply two Montgomery form numbers mod the modulus (prime).
|
|
|
-; * (r = a * b mod m)
|
|
|
-; *
|
|
|
-; * r Result of multiplication.
|
|
|
-; * a First number to multiply in Montgomery form.
|
|
|
-; * b Second number to multiply in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; * mp Montgomery multiplier.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_mul_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov r10, rdx
|
|
|
- ; A[0] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r10]
|
|
|
- mov r11, rax
|
|
|
- mov r12, rdx
|
|
|
- ; A[0] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r10]
|
|
|
- xor r13, r13
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- ; A[1] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r10+8]
|
|
|
- xor r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[0] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r10]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- ; A[1] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r10+8]
|
|
|
- xor r15, r15
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[2] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r10+16]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[0] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r10]
|
|
|
- xor rdi, rdi
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc rdi, 0
|
|
|
- ; A[1] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r10+8]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc rdi, 0
|
|
|
- ; A[2] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r10+16]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc rdi, 0
|
|
|
- ; A[3] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r10+24]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc rdi, 0
|
|
|
- ; A[1] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r10+8]
|
|
|
- xor rsi, rsi
|
|
|
- add r15, rax
|
|
|
- adc rdi, rdx
|
|
|
- adc rsi, 0
|
|
|
- ; A[2] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r10+16]
|
|
|
- add r15, rax
|
|
|
- adc rdi, rdx
|
|
|
- adc rsi, 0
|
|
|
- ; A[3] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r10+24]
|
|
|
- add r15, rax
|
|
|
- adc rdi, rdx
|
|
|
- adc rsi, 0
|
|
|
- ; A[2] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r10+16]
|
|
|
- xor rbx, rbx
|
|
|
- add rdi, rax
|
|
|
- adc rsi, rdx
|
|
|
- adc rbx, 0
|
|
|
- ; A[3] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r10+24]
|
|
|
- add rdi, rax
|
|
|
- adc rsi, rdx
|
|
|
- adc rbx, 0
|
|
|
- ; A[3] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r10+24]
|
|
|
- add rsi, rax
|
|
|
- adc rbx, rdx
|
|
|
- ; Start Reduction
|
|
|
- ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
|
|
|
- ; - a[0] << 32 << 192
|
|
|
- ; a[0]-a[3] + (a[0] * 2) << 192
|
|
|
- mov rax, r11
|
|
|
- lea rdx, QWORD PTR [r14+2*r11]
|
|
|
- mov r10, r12
|
|
|
- mov r8, r13
|
|
|
- mov r9, r13
|
|
|
- ; a[0]-a[2] << 32
|
|
|
- shl r11, 32
|
|
|
- shld r9, r10, 32
|
|
|
- shld r12, rax, 32
|
|
|
- ; - a[0] << 32 << 192
|
|
|
- sub rdx, r11
|
|
|
- ; + a[0]-a[2] << 32 << 64
|
|
|
- add r10, r11
|
|
|
- adc r8, r12
|
|
|
- adc rdx, r9
|
|
|
- ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
|
|
|
- xor r9, r9
|
|
|
- ; a += mu << 256
|
|
|
- add r15, rax
|
|
|
- adc rdi, r10
|
|
|
- adc rsi, r8
|
|
|
- adc rbx, rdx
|
|
|
- sbb r11, r11
|
|
|
- ; a += mu << 192
|
|
|
- add r14, rax
|
|
|
- adc r15, r10
|
|
|
- mov r12, r10
|
|
|
- adc rdi, r8
|
|
|
- adc rsi, rdx
|
|
|
- adc rbx, 0
|
|
|
- sbb r11, 0
|
|
|
- ; mu <<= 32
|
|
|
- shld r9, rdx, 32
|
|
|
- shld rdx, r8, 32
|
|
|
- shld r8, r10, 32
|
|
|
- shld r10, rax, 32
|
|
|
- shl rax, 32
|
|
|
- ; a -= (mu << 32) << 192
|
|
|
- sub r14, rax
|
|
|
- sbb r15, r10
|
|
|
- sbb rdi, r8
|
|
|
- sbb rsi, rdx
|
|
|
- sbb rbx, r9
|
|
|
- adc r11, 0
|
|
|
- ; a += (mu << 32) << 64
|
|
|
- sub r12, rax
|
|
|
- adc r13, r10
|
|
|
- adc r14, r8
|
|
|
- adc r15, rdx
|
|
|
- adc rdi, r9
|
|
|
- adc rsi, 0
|
|
|
- adc rbx, 0
|
|
|
- sbb r11, 0
|
|
|
- mov r10, 18446744069414584321
|
|
|
- ; mask m and sub from result if overflow
|
|
|
- ; m[0] = -1 & mask = mask
|
|
|
- ; m[2] = 0 & mask = 0
|
|
|
- mov eax, r11d
|
|
|
- and r10, r11
|
|
|
- sub r15, r11
|
|
|
- sbb rdi, rax
|
|
|
- mov QWORD PTR [rcx], r15
|
|
|
- sbb rsi, 0
|
|
|
- mov QWORD PTR [rcx+8], rdi
|
|
|
- sbb rbx, r10
|
|
|
- mov QWORD PTR [rcx+16], rsi
|
|
|
- mov QWORD PTR [rcx+24], rbx
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_mul_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
|
|
|
-; *
|
|
|
-; * r Result of squaring.
|
|
|
-; * a Number to square in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; * mp Montgomery multiplier.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_sqr_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov r8, rdx
|
|
|
- ; A[0] * A[1]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- mov r11, rax
|
|
|
- mov r12, rdx
|
|
|
- ; A[0] * A[2]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- xor r13, r13
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- ; A[0] * A[3]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- xor r14, r14
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- ; A[1] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- xor r15, r15
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[1] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- ; A[2] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- xor rdi, rdi
|
|
|
- add r15, rax
|
|
|
- adc rdi, rdx
|
|
|
- ; Double
|
|
|
- xor rsi, rsi
|
|
|
- add r11, r11
|
|
|
- adc r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- adc r15, r15
|
|
|
- adc rdi, rdi
|
|
|
- adc rsi, 0
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul rax
|
|
|
- mov rax, rax
|
|
|
- mov rdx, rdx
|
|
|
- mov r10, rax
|
|
|
- mov rbx, rdx
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul rax
|
|
|
- mov rax, rax
|
|
|
- mov rdx, rdx
|
|
|
- add r11, rbx
|
|
|
- adc r12, rax
|
|
|
- adc rdx, 0
|
|
|
- mov rbx, rdx
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul rax
|
|
|
- mov rax, rax
|
|
|
- mov rdx, rdx
|
|
|
- add r13, rbx
|
|
|
- adc r14, rax
|
|
|
- adc rdx, 0
|
|
|
- mov rbx, rdx
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul rax
|
|
|
- mov rax, rax
|
|
|
- mov rdx, rdx
|
|
|
- add r15, rbx
|
|
|
- adc rdi, rax
|
|
|
- adc rsi, rdx
|
|
|
- ; Start Reduction
|
|
|
- ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
|
|
|
- ; - a[0] << 32 << 192
|
|
|
- ; a[0]-a[3] + (a[0] * 2) << 192
|
|
|
- mov rax, r10
|
|
|
- lea rdx, QWORD PTR [r13+2*r10]
|
|
|
- mov r8, r11
|
|
|
- mov rbx, r12
|
|
|
- mov r9, r12
|
|
|
- ; a[0]-a[2] << 32
|
|
|
- shl r10, 32
|
|
|
- shld r9, r8, 32
|
|
|
- shld r11, rax, 32
|
|
|
- ; - a[0] << 32 << 192
|
|
|
- sub rdx, r10
|
|
|
- ; + a[0]-a[2] << 32 << 64
|
|
|
- add r8, r10
|
|
|
- adc rbx, r11
|
|
|
- adc rdx, r9
|
|
|
- ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
|
|
|
- xor r9, r9
|
|
|
- ; a += mu << 256
|
|
|
- add r14, rax
|
|
|
- adc r15, r8
|
|
|
- adc rdi, rbx
|
|
|
- adc rsi, rdx
|
|
|
- sbb r10, r10
|
|
|
- ; a += mu << 192
|
|
|
- add r13, rax
|
|
|
- adc r14, r8
|
|
|
- mov r11, r8
|
|
|
- adc r15, rbx
|
|
|
- adc rdi, rdx
|
|
|
- adc rsi, 0
|
|
|
- sbb r10, 0
|
|
|
- ; mu <<= 32
|
|
|
- shld r9, rdx, 32
|
|
|
- shld rdx, rbx, 32
|
|
|
- shld rbx, r8, 32
|
|
|
- shld r8, rax, 32
|
|
|
- shl rax, 32
|
|
|
- ; a -= (mu << 32) << 192
|
|
|
- sub r13, rax
|
|
|
- sbb r14, r8
|
|
|
- sbb r15, rbx
|
|
|
- sbb rdi, rdx
|
|
|
- sbb rsi, r9
|
|
|
- adc r10, 0
|
|
|
- ; a += (mu << 32) << 64
|
|
|
- sub r11, rax
|
|
|
- adc r12, r8
|
|
|
- adc r13, rbx
|
|
|
- adc r14, rdx
|
|
|
- adc r15, r9
|
|
|
- adc rdi, 0
|
|
|
- adc rsi, 0
|
|
|
- sbb r10, 0
|
|
|
- mov r8, 18446744069414584321
|
|
|
- ; mask m and sub from result if overflow
|
|
|
- ; m[0] = -1 & mask = mask
|
|
|
- ; m[2] = 0 & mask = 0
|
|
|
- mov eax, r10d
|
|
|
- and r8, r10
|
|
|
- sub r14, r10
|
|
|
- sbb r15, rax
|
|
|
- mov QWORD PTR [rcx], r14
|
|
|
- sbb rdi, 0
|
|
|
- mov QWORD PTR [rcx+8], r15
|
|
|
- sbb rsi, r8
|
|
|
- mov QWORD PTR [rcx+16], rdi
|
|
|
- mov QWORD PTR [rcx+24], rsi
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_sqr_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Compare a with b in constant time.
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; * return -ve, 0 or +ve if a is less than, equal to or greater than b
|
|
|
-; * respectively.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_cmp_4 PROC
|
|
|
- push r12
|
|
|
- xor r9, r9
|
|
|
- mov r8, -1
|
|
|
- mov rax, -1
|
|
|
- mov r10, 1
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+8]
|
|
|
- mov r12, QWORD PTR [rdx+8]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx]
|
|
|
- mov r12, QWORD PTR [rdx]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- xor rax, r8
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_cmp_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_cond_sub_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov r14, QWORD PTR [r8]
|
|
|
- mov r15, QWORD PTR [r8+8]
|
|
|
- mov rdi, QWORD PTR [r8+16]
|
|
|
- mov rsi, QWORD PTR [r8+24]
|
|
|
- and r14, r9
|
|
|
- and r15, r9
|
|
|
- and rdi, r9
|
|
|
- and rsi, r9
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- mov r13, QWORD PTR [rdx+24]
|
|
|
- sub r10, r14
|
|
|
- sbb r11, r15
|
|
|
- sbb r12, rdi
|
|
|
- sbb r13, rsi
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- mov QWORD PTR [rcx+24], r13
|
|
|
- sbb rax, rax
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_cond_sub_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 256 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_reduce_4 PROC
|
|
|
- push rbx
|
|
|
- push rsi
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- mov r8, rcx
|
|
|
- mov r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [r8+24]
|
|
|
- mov r13, QWORD PTR [r8+32]
|
|
|
- mov r14, QWORD PTR [r8+40]
|
|
|
- mov r15, QWORD PTR [r8+48]
|
|
|
- mov rdi, QWORD PTR [r8+56]
|
|
|
- ; Start Reduction
|
|
|
- ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
|
|
|
- ; - a[0] << 32 << 192
|
|
|
- ; a[0]-a[3] + (a[0] * 2) << 192
|
|
|
- mov rax, r9
|
|
|
- lea rdx, QWORD PTR [r12+2*r9]
|
|
|
- mov rbx, r10
|
|
|
- mov rcx, r11
|
|
|
- mov rsi, r11
|
|
|
- ; a[0]-a[2] << 32
|
|
|
- shl r9, 32
|
|
|
- shld rsi, rbx, 32
|
|
|
- shld r10, rax, 32
|
|
|
- ; - a[0] << 32 << 192
|
|
|
- sub rdx, r9
|
|
|
- ; + a[0]-a[2] << 32 << 64
|
|
|
- add rbx, r9
|
|
|
- adc rcx, r10
|
|
|
- adc rdx, rsi
|
|
|
- ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
|
|
|
- xor rsi, rsi
|
|
|
- ; a += mu << 256
|
|
|
- add r13, rax
|
|
|
- adc r14, rbx
|
|
|
- adc r15, rcx
|
|
|
- adc rdi, rdx
|
|
|
- sbb r9, r9
|
|
|
- ; a += mu << 192
|
|
|
- add r12, rax
|
|
|
- adc r13, rbx
|
|
|
- mov r10, rbx
|
|
|
- adc r14, rcx
|
|
|
- adc r15, rdx
|
|
|
- adc rdi, 0
|
|
|
- sbb r9, 0
|
|
|
- ; mu <<= 32
|
|
|
- shld rsi, rdx, 32
|
|
|
- shld rdx, rcx, 32
|
|
|
- shld rcx, rbx, 32
|
|
|
- shld rbx, rax, 32
|
|
|
- shl rax, 32
|
|
|
- ; a -= (mu << 32) << 192
|
|
|
- sub r12, rax
|
|
|
- sbb r13, rbx
|
|
|
- sbb r14, rcx
|
|
|
- sbb r15, rdx
|
|
|
- sbb rdi, rsi
|
|
|
- adc r9, 0
|
|
|
- ; a += (mu << 32) << 64
|
|
|
- sub r10, rax
|
|
|
- adc r11, rbx
|
|
|
- adc r12, rcx
|
|
|
- adc r13, rdx
|
|
|
- adc r14, rsi
|
|
|
- adc r15, 0
|
|
|
- adc rdi, 0
|
|
|
- sbb r9, 0
|
|
|
- mov rbx, 18446744069414584321
|
|
|
- ; mask m and sub from result if overflow
|
|
|
- ; m[0] = -1 & mask = mask
|
|
|
- ; m[2] = 0 & mask = 0
|
|
|
- mov eax, r9d
|
|
|
- and rbx, r9
|
|
|
- sub r13, r9
|
|
|
- sbb r14, rax
|
|
|
- mov QWORD PTR [r8], r13
|
|
|
- sbb r15, 0
|
|
|
- mov QWORD PTR [r8+8], r14
|
|
|
- sbb rdi, rbx
|
|
|
- mov QWORD PTR [r8+16], r15
|
|
|
- mov QWORD PTR [r8+24], rdi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rsi
|
|
|
- pop rbx
|
|
|
- ret
|
|
|
-sp_256_mont_reduce_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 256 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_reduce_order_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov r9, rdx
|
|
|
- ; i = 0
|
|
|
- xor rdi, rdi
|
|
|
- mov r10, 4
|
|
|
- mov r15, rcx
|
|
|
-L_mont_loop_4:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov r14, QWORD PTR [r15]
|
|
|
- imul r14, r8
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mov rax, QWORD PTR [r9]
|
|
|
- mov r12, QWORD PTR [r9+8]
|
|
|
- mul r14
|
|
|
- mov rsi, QWORD PTR [r15]
|
|
|
- add rsi, rax
|
|
|
- mov r11, rdx
|
|
|
- mov QWORD PTR [r15], rsi
|
|
|
- adc r11, 0
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mov rax, r12
|
|
|
- mul r14
|
|
|
- mov r12, QWORD PTR [r9+16]
|
|
|
- mov rsi, QWORD PTR [r15+8]
|
|
|
- add rax, r11
|
|
|
- mov r13, rdx
|
|
|
- adc r13, 0
|
|
|
- add rsi, rax
|
|
|
- mov QWORD PTR [r15+8], rsi
|
|
|
- adc r13, 0
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mov rax, r12
|
|
|
- mul r14
|
|
|
- mov r12, QWORD PTR [r9+24]
|
|
|
- mov rsi, QWORD PTR [r15+16]
|
|
|
- add rax, r13
|
|
|
- mov r11, rdx
|
|
|
- adc r11, 0
|
|
|
- add rsi, rax
|
|
|
- mov QWORD PTR [r15+16], rsi
|
|
|
- adc r11, 0
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mov rax, r12
|
|
|
- mul r14
|
|
|
- mov rsi, QWORD PTR [r15+24]
|
|
|
- add rax, r11
|
|
|
- adc rdx, rdi
|
|
|
- mov rdi, 0
|
|
|
- adc rdi, 0
|
|
|
- add rsi, rax
|
|
|
- mov QWORD PTR [r15+24], rsi
|
|
|
- adc QWORD PTR [r15+32], rdx
|
|
|
- adc rdi, 0
|
|
|
- ; i += 1
|
|
|
- add r15, 8
|
|
|
- dec r10
|
|
|
- jnz L_mont_loop_4
|
|
|
- xor rax, rax
|
|
|
- mov rdx, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [rcx+40]
|
|
|
- mov rsi, QWORD PTR [rcx+48]
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- sub rax, rdi
|
|
|
- mov r12, QWORD PTR [r9]
|
|
|
- mov r13, QWORD PTR [r9+8]
|
|
|
- mov r14, QWORD PTR [r9+16]
|
|
|
- mov r15, QWORD PTR [r9+24]
|
|
|
- and r12, rax
|
|
|
- and r13, rax
|
|
|
- and r14, rax
|
|
|
- and r15, rax
|
|
|
- sub rdx, r12
|
|
|
- sbb r10, r13
|
|
|
- sbb rsi, r14
|
|
|
- sbb r11, r15
|
|
|
- mov QWORD PTR [rcx], rdx
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], rsi
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_reduce_order_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Add two Montgomery form numbers (r = a + b % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a First number to add in Montgomery form.
|
|
|
-; * b Second number to add in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_add_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- add rax, QWORD PTR [r8]
|
|
|
- adc r9, QWORD PTR [r8+8]
|
|
|
- mov r13, 18446744069414584321
|
|
|
- adc r10, QWORD PTR [r8+16]
|
|
|
- adc r11, QWORD PTR [r8+24]
|
|
|
- sbb rdx, rdx
|
|
|
- mov r12d, edx
|
|
|
- and r13, rdx
|
|
|
- sub rax, rdx
|
|
|
- sbb r9, r12
|
|
|
- sbb r10, 0
|
|
|
- sbb r11, r13
|
|
|
- adc rdx, 0
|
|
|
- and r12, rdx
|
|
|
- and r13, rdx
|
|
|
- sub rax, rdx
|
|
|
- sbb r9, r12
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- sbb r10, 0
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- sbb r11, r13
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_add_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Double a Montgomery form number (r = a + a % m).
|
|
|
-; *
|
|
|
-; * r Result of doubling.
|
|
|
-; * a Number to double in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_dbl_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- add rax, rax
|
|
|
- adc r8, r8
|
|
|
- mov r12, 18446744069414584321
|
|
|
- adc r9, r9
|
|
|
- mov r13, r10
|
|
|
- adc r10, r10
|
|
|
- sar r13, 63
|
|
|
- mov r11d, r13d
|
|
|
- and r12, r13
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r11
|
|
|
- sbb r9, 0
|
|
|
- sbb r10, r12
|
|
|
- adc r13, 0
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r11
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- sbb r9, 0
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- sbb r10, r12
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_dbl_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Triple a Montgomery form number (r = a + a + a % m).
|
|
|
-; *
|
|
|
-; * r Result of Tripling.
|
|
|
-; * a Number to triple in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_tpl_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- add rax, rax
|
|
|
- adc r8, r8
|
|
|
- mov r12, 18446744069414584321
|
|
|
- adc r9, r9
|
|
|
- adc r10, r10
|
|
|
- sbb r13, r13
|
|
|
- mov r11d, r13d
|
|
|
- and r12, r13
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r11
|
|
|
- sbb r9, 0
|
|
|
- sbb r10, r12
|
|
|
- adc r13, 0
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r11
|
|
|
- sbb r9, 0
|
|
|
- sbb r10, r12
|
|
|
- add rax, QWORD PTR [rdx]
|
|
|
- adc r8, QWORD PTR [rdx+8]
|
|
|
- mov r12, 18446744069414584321
|
|
|
- adc r9, QWORD PTR [rdx+16]
|
|
|
- adc r10, QWORD PTR [rdx+24]
|
|
|
- sbb r13, 0
|
|
|
- mov r11d, r13d
|
|
|
- and r12, r13
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r11
|
|
|
- sbb r9, 0
|
|
|
- sbb r10, r12
|
|
|
- adc r13, 0
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r11
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- sbb r9, 0
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- sbb r10, r12
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_tpl_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Subtract two Montgomery form numbers (r = a - b % m).
|
|
|
-; *
|
|
|
-; * r Result of subtration.
|
|
|
-; * a Number to subtract from in Montgomery form.
|
|
|
-; * b Number to subtract with in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_sub_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- sub rax, QWORD PTR [r8]
|
|
|
- sbb r9, QWORD PTR [r8+8]
|
|
|
- mov r13, 18446744069414584321
|
|
|
- sbb r10, QWORD PTR [r8+16]
|
|
|
- sbb r11, QWORD PTR [r8+24]
|
|
|
- sbb rdx, rdx
|
|
|
- mov r12d, edx
|
|
|
- and r13, rdx
|
|
|
- add rax, rdx
|
|
|
- adc r9, r12
|
|
|
- adc r10, 0
|
|
|
- adc r11, r13
|
|
|
- adc rdx, 0
|
|
|
- and r12, rdx
|
|
|
- and r13, rdx
|
|
|
- add rax, rdx
|
|
|
- adc r9, r12
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- adc r11, r13
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_sub_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
|
|
|
-; *
|
|
|
-; * r Result of division by 2.
|
|
|
-; * a Number to divide.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_div2_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov r12, 18446744069414584321
|
|
|
- mov r13, rax
|
|
|
- and r13, 1
|
|
|
- neg r13
|
|
|
- mov r11d, r13d
|
|
|
- and r12, r13
|
|
|
- add rax, r13
|
|
|
- adc r8, r11
|
|
|
- adc r9, 0
|
|
|
- adc r10, r12
|
|
|
- mov r13, 0
|
|
|
- adc r13, 0
|
|
|
- shrd rax, r8, 1
|
|
|
- shrd r8, r9, 1
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r13, 1
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_div2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Two Montgomery numbers, subtract double second from first (r = a - 2.b % m).
|
|
|
-; *
|
|
|
-; * r Result of subtration.
|
|
|
-; * a Number to subtract from in Montgomery form.
|
|
|
-; * b Number to double and subtract with in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_rsb_sub_dbl_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r13, QWORD PTR [r8+8]
|
|
|
- mov r14, QWORD PTR [r8+16]
|
|
|
- mov r15, QWORD PTR [r8+24]
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- mov rsi, 18446744069414584321
|
|
|
- adc r14, r14
|
|
|
- adc r15, r15
|
|
|
- sbb rdx, rdx
|
|
|
- mov edi, edx
|
|
|
- and rsi, rdx
|
|
|
- sub r12, rdx
|
|
|
- sbb r13, rdi
|
|
|
- sbb r14, 0
|
|
|
- sbb r15, rsi
|
|
|
- adc rdx, 0
|
|
|
- and rdi, rdx
|
|
|
- and rsi, rdx
|
|
|
- sub r12, rdx
|
|
|
- sbb r13, rdi
|
|
|
- sbb r14, 0
|
|
|
- sbb r15, rsi
|
|
|
- sub rax, r12
|
|
|
- sbb r9, r13
|
|
|
- mov rsi, 18446744069414584321
|
|
|
- sbb r10, r14
|
|
|
- sbb r11, r15
|
|
|
- sbb rdx, 0
|
|
|
- mov edi, edx
|
|
|
- and rsi, rdx
|
|
|
- add rax, rdx
|
|
|
- adc r9, rdi
|
|
|
- adc r10, 0
|
|
|
- adc r11, rsi
|
|
|
- adc rdx, 0
|
|
|
- and rdi, rdx
|
|
|
- and rsi, rdx
|
|
|
- add rax, rdx
|
|
|
- adc r9, rdi
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- adc r11, rsi
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r13, QWORD PTR [r8+8]
|
|
|
- mov r14, QWORD PTR [r8+16]
|
|
|
- mov r15, QWORD PTR [r8+24]
|
|
|
- sub r12, rax
|
|
|
- sbb r13, r9
|
|
|
- mov rsi, 18446744069414584321
|
|
|
- sbb r14, r10
|
|
|
- sbb r15, r11
|
|
|
- sbb rdx, rdx
|
|
|
- mov edi, edx
|
|
|
- and rsi, rdx
|
|
|
- add r12, rdx
|
|
|
- adc r13, rdi
|
|
|
- adc r14, 0
|
|
|
- adc r15, rsi
|
|
|
- adc rdx, 0
|
|
|
- and rdi, rdx
|
|
|
- and rsi, rdx
|
|
|
- add r12, rdx
|
|
|
- adc r13, rdi
|
|
|
- mov QWORD PTR [r8], r12
|
|
|
- adc r14, 0
|
|
|
- mov QWORD PTR [r8+8], r13
|
|
|
- adc r15, rsi
|
|
|
- mov QWORD PTR [r8+16], r14
|
|
|
- mov QWORD PTR [r8+24], r15
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_rsb_sub_dbl_4 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-; /* Touch each possible point that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of point to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_get_point_33_4 PROC
|
|
|
- sub rsp, 160
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- vmovdqu OWORD PTR [rsp+128], xmm14
|
|
|
- vmovdqu OWORD PTR [rsp+144], xmm15
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- add rdx, 200
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 32
|
|
|
- pshufd xmm15, xmm15, 0
|
|
|
- pshufd xmm13, xmm13, 0
|
|
|
- pxor xmm14, xmm14
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- movdqa xmm14, xmm15
|
|
|
-L_256_get_point_33_4_start_1:
|
|
|
- movdqa xmm12, xmm14
|
|
|
- paddd xmm14, xmm15
|
|
|
- pcmpeqd xmm12, xmm13
|
|
|
- movdqu xmm6, [rdx]
|
|
|
- movdqu xmm7, [rdx+16]
|
|
|
- movdqu xmm8, [rdx+64]
|
|
|
- movdqu xmm9, [rdx+80]
|
|
|
- movdqu xmm10, [rdx+128]
|
|
|
- movdqu xmm11, [rdx+144]
|
|
|
- add rdx, 200
|
|
|
- pand xmm6, xmm12
|
|
|
- pand xmm7, xmm12
|
|
|
- pand xmm8, xmm12
|
|
|
- pand xmm9, xmm12
|
|
|
- pand xmm10, xmm12
|
|
|
- pand xmm11, xmm12
|
|
|
- por xmm0, xmm6
|
|
|
- por xmm1, xmm7
|
|
|
- por xmm2, xmm8
|
|
|
- por xmm3, xmm9
|
|
|
- por xmm4, xmm10
|
|
|
- por xmm5, xmm11
|
|
|
- dec rax
|
|
|
- jnz L_256_get_point_33_4_start_1
|
|
|
- movdqu [rcx], xmm0
|
|
|
- movdqu [rcx+16], xmm1
|
|
|
- movdqu [rcx+64], xmm2
|
|
|
- movdqu [rcx+80], xmm3
|
|
|
- movdqu [rcx+128], xmm4
|
|
|
- movdqu [rcx+144], xmm5
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- vmovdqu xmm14, OWORD PTR [rsp+128]
|
|
|
- vmovdqu xmm15, OWORD PTR [rsp+144]
|
|
|
- add rsp, 160
|
|
|
- ret
|
|
|
-sp_256_get_point_33_4 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Touch each possible point that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of point to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_get_point_33_avx2_4 PROC
|
|
|
- sub rsp, 64
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- mov rax, 1
|
|
|
- movd xmm7, r8d
|
|
|
- add rdx, 200
|
|
|
- movd xmm9, eax
|
|
|
- mov rax, 32
|
|
|
- vpxor ymm8, ymm8, ymm8
|
|
|
- vpermd ymm7, ymm8, ymm7
|
|
|
- vpermd ymm9, ymm8, ymm9
|
|
|
- vpxor ymm0, ymm0, ymm0
|
|
|
- vpxor ymm1, ymm1, ymm1
|
|
|
- vpxor ymm2, ymm2, ymm2
|
|
|
- vmovdqa ymm8, ymm9
|
|
|
-L_256_get_point_33_avx2_4_start:
|
|
|
- vpcmpeqd ymm6, ymm8, ymm7
|
|
|
- vpaddd ymm8, ymm8, ymm9
|
|
|
- vmovupd ymm3, YMMWORD PTR [rdx]
|
|
|
- vmovupd ymm4, YMMWORD PTR [rdx+64]
|
|
|
- vmovupd ymm5, YMMWORD PTR [rdx+128]
|
|
|
- add rdx, 200
|
|
|
- vpand ymm3, ymm3, ymm6
|
|
|
- vpand ymm4, ymm4, ymm6
|
|
|
- vpand ymm5, ymm5, ymm6
|
|
|
- vpor ymm0, ymm0, ymm3
|
|
|
- vpor ymm1, ymm1, ymm4
|
|
|
- vpor ymm2, ymm2, ymm5
|
|
|
- dec rax
|
|
|
- jnz L_256_get_point_33_avx2_4_start
|
|
|
- vmovupd YMMWORD PTR [rcx], ymm0
|
|
|
- vmovupd YMMWORD PTR [rcx+64], ymm1
|
|
|
- vmovupd YMMWORD PTR [rcx+128], ymm2
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- add rsp, 64
|
|
|
- ret
|
|
|
-sp_256_get_point_33_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply two Montgomery form numbers mod the modulus (prime).
|
|
|
-; * (r = a * b mod m)
|
|
|
-; *
|
|
|
-; * r Result of multiplication.
|
|
|
-; * a First number to multiply in Montgomery form.
|
|
|
-; * b Second number to multiply in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; * mp Montgomery multiplier.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_mul_avx2_4 PROC
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov rbp, r8
|
|
|
- mov rax, rdx
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- mov r14, QWORD PTR [rbp+8]
|
|
|
- ; A[0] * B[0]
|
|
|
- mulx r9, r8, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- ; A[0] * B[1]
|
|
|
- mulx r10, rdi, r14
|
|
|
- adcx r9, rdi
|
|
|
- ; A[0] * B[2]
|
|
|
- mulx r11, rdi, QWORD PTR [rbp+16]
|
|
|
- adcx r10, rdi
|
|
|
- ; A[0] * B[3]
|
|
|
- mulx r12, rdi, QWORD PTR [rbp+24]
|
|
|
- adcx r11, rdi
|
|
|
- mov rdx, QWORD PTR [rax+8]
|
|
|
- adcx r12, rbx
|
|
|
- ; A[1] * B[0]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- adcx r9, rdi
|
|
|
- ; A[1] * B[1]
|
|
|
- mulx r15, rdi, r14
|
|
|
- adox r10, rsi
|
|
|
- adcx r10, rdi
|
|
|
- ; A[1] * B[2]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp+16]
|
|
|
- adox r11, r15
|
|
|
- adcx r11, rdi
|
|
|
- ; A[1] * B[3]
|
|
|
- mulx r13, rdi, QWORD PTR [rbp+24]
|
|
|
- adox r12, rsi
|
|
|
- adcx r12, rdi
|
|
|
- adox r13, rbx
|
|
|
- mov rdx, QWORD PTR [rax+16]
|
|
|
- adcx r13, rbx
|
|
|
- ; A[2] * B[0]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- adcx r10, rdi
|
|
|
- ; A[2] * B[1]
|
|
|
- mulx r15, rdi, r14
|
|
|
- adox r11, rsi
|
|
|
- adcx r11, rdi
|
|
|
- ; A[2] * B[2]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp+16]
|
|
|
- adox r12, r15
|
|
|
- adcx r12, rdi
|
|
|
- ; A[2] * B[3]
|
|
|
- mulx r14, rdi, QWORD PTR [rbp+24]
|
|
|
- adox r13, rsi
|
|
|
- adcx r13, rdi
|
|
|
- adox r14, rbx
|
|
|
- mov rdx, QWORD PTR [rax+24]
|
|
|
- adcx r14, rbx
|
|
|
- ; A[3] * B[0]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- adcx r11, rdi
|
|
|
- ; A[3] * B[1]
|
|
|
- mulx r15, rdi, QWORD PTR [rbp+8]
|
|
|
- adox r12, rsi
|
|
|
- adcx r12, rdi
|
|
|
- ; A[3] * B[2]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp+16]
|
|
|
- adox r13, r15
|
|
|
- adcx r13, rdi
|
|
|
- ; A[3] * B[3]
|
|
|
- mulx r15, rdi, QWORD PTR [rbp+24]
|
|
|
- adox r14, rsi
|
|
|
- adcx r14, rdi
|
|
|
- adox r15, rbx
|
|
|
- adcx r15, rbx
|
|
|
- ; Start Reduction
|
|
|
- ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
|
|
|
- ; - a[0] << 32 << 192
|
|
|
- ; a[0]-a[3] + (a[0] * 2) << 192
|
|
|
- mov rdi, r8
|
|
|
- lea rdx, QWORD PTR [r11+2*r8]
|
|
|
- mov rax, r9
|
|
|
- mov rbp, r10
|
|
|
- mov rsi, r10
|
|
|
- ; a[0]-a[2] << 32
|
|
|
- shl r8, 32
|
|
|
- shld rsi, rax, 32
|
|
|
- shld r9, rdi, 32
|
|
|
- ; - a[0] << 32 << 192
|
|
|
- sub rdx, r8
|
|
|
- ; + a[0]-a[2] << 32 << 64
|
|
|
- add rax, r8
|
|
|
- adc rbp, r9
|
|
|
- adc rdx, rsi
|
|
|
- ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
|
|
|
- xor rsi, rsi
|
|
|
- ; a += mu << 256
|
|
|
- add r12, rdi
|
|
|
- adc r13, rax
|
|
|
- adc r14, rbp
|
|
|
- adc r15, rdx
|
|
|
- sbb r8, r8
|
|
|
- ; a += mu << 192
|
|
|
- add r11, rdi
|
|
|
- adc r12, rax
|
|
|
- mov r9, rax
|
|
|
- adc r13, rbp
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- sbb r8, 0
|
|
|
- ; mu <<= 32
|
|
|
- shld rsi, rdx, 32
|
|
|
- shld rdx, rbp, 32
|
|
|
- shld rbp, rax, 32
|
|
|
- shld rax, rdi, 32
|
|
|
- shl rdi, 32
|
|
|
- ; a -= (mu << 32) << 192
|
|
|
- sub r11, rdi
|
|
|
- sbb r12, rax
|
|
|
- sbb r13, rbp
|
|
|
- sbb r14, rdx
|
|
|
- sbb r15, rsi
|
|
|
- adc r8, 0
|
|
|
- ; a += (mu << 32) << 64
|
|
|
- sub r9, rdi
|
|
|
- adc r10, rax
|
|
|
- adc r11, rbp
|
|
|
- adc r12, rdx
|
|
|
- adc r13, rsi
|
|
|
- adc r14, 0
|
|
|
- adc r15, 0
|
|
|
- sbb r8, 0
|
|
|
- mov rax, 18446744069414584321
|
|
|
- ; mask m and sub from result if overflow
|
|
|
- ; m[0] = -1 & mask = mask
|
|
|
- ; m[2] = 0 & mask = 0
|
|
|
- mov edi, r8d
|
|
|
- and rax, r8
|
|
|
- sub r12, r8
|
|
|
- sbb r13, rdi
|
|
|
- mov QWORD PTR [rcx], r12
|
|
|
- sbb r14, 0
|
|
|
- mov QWORD PTR [rcx+8], r13
|
|
|
- sbb r15, rax
|
|
|
- mov QWORD PTR [rcx+16], r14
|
|
|
- mov QWORD PTR [rcx+24], r15
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- ret
|
|
|
-sp_256_mont_mul_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
|
|
|
-; *
|
|
|
-; * r Result of squaring.
|
|
|
-; * a Number to square in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; * mp Montgomery multiplier.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_sqr_avx2_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov rax, rdx
|
|
|
- xor r8, r8
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- mov rsi, QWORD PTR [rax+8]
|
|
|
- mov rbx, QWORD PTR [rax+16]
|
|
|
- mov r15, QWORD PTR [rax+24]
|
|
|
- ; A[0] * A[1]
|
|
|
- mulx r10, r9, rsi
|
|
|
- ; A[0] * A[2]
|
|
|
- mulx r11, r8, rbx
|
|
|
- adox r10, r8
|
|
|
- ; A[0] * A[3]
|
|
|
- mulx r12, r8, r15
|
|
|
- mov rdx, rsi
|
|
|
- adox r11, r8
|
|
|
- ; A[1] * A[2]
|
|
|
- mulx rdi, r8, rbx
|
|
|
- mov rdx, r15
|
|
|
- adcx r11, r8
|
|
|
- ; A[1] * A[3]
|
|
|
- mulx r13, r8, rsi
|
|
|
- mov r15, 0
|
|
|
- adox r12, rdi
|
|
|
- adcx r12, r8
|
|
|
- ; A[2] * A[3]
|
|
|
- mulx r14, r8, rbx
|
|
|
- adox r13, r15
|
|
|
- adcx r13, r8
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- ; Double with Carry Flag
|
|
|
- xor r15, r15
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- mulx rdi, r8, rdx
|
|
|
- adcx r9, r9
|
|
|
- adcx r10, r10
|
|
|
- adox r9, rdi
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rdx, QWORD PTR [rax+8]
|
|
|
- mulx rbx, rsi, rdx
|
|
|
- adcx r11, r11
|
|
|
- adox r10, rsi
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rdx, QWORD PTR [rax+16]
|
|
|
- mulx rsi, rdi, rdx
|
|
|
- adcx r12, r12
|
|
|
- adox r11, rbx
|
|
|
- adcx r13, r13
|
|
|
- adox r12, rdi
|
|
|
- adcx r14, r14
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rdx, QWORD PTR [rax+24]
|
|
|
- mulx rbx, rdi, rdx
|
|
|
- adox r13, rsi
|
|
|
- adcx r15, r15
|
|
|
- adox r14, rdi
|
|
|
- adox r15, rbx
|
|
|
- ; Start Reduction
|
|
|
- ; mu = a[0]-a[3] + a[0]-a[2] << 32 << 64 + (a[0] * 2) << 192
|
|
|
- ; - a[0] << 32 << 192
|
|
|
- ; a[0]-a[3] + (a[0] * 2) << 192
|
|
|
- mov rdi, r8
|
|
|
- lea rdx, QWORD PTR [r11+2*r8]
|
|
|
- mov rax, r9
|
|
|
- mov rsi, r10
|
|
|
- mov rbx, r10
|
|
|
- ; a[0]-a[2] << 32
|
|
|
- shl r8, 32
|
|
|
- shld rbx, rax, 32
|
|
|
- shld r9, rdi, 32
|
|
|
- ; - a[0] << 32 << 192
|
|
|
- sub rdx, r8
|
|
|
- ; + a[0]-a[2] << 32 << 64
|
|
|
- add rax, r8
|
|
|
- adc rsi, r9
|
|
|
- adc rdx, rbx
|
|
|
- ; a += (mu << 256) - (mu << 224) + (mu << 192) + (mu << 96) - mu
|
|
|
- xor rbx, rbx
|
|
|
- ; a += mu << 256
|
|
|
- add r12, rdi
|
|
|
- adc r13, rax
|
|
|
- adc r14, rsi
|
|
|
- adc r15, rdx
|
|
|
- sbb r8, r8
|
|
|
- ; a += mu << 192
|
|
|
- add r11, rdi
|
|
|
- adc r12, rax
|
|
|
- mov r9, rax
|
|
|
- adc r13, rsi
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- sbb r8, 0
|
|
|
- ; mu <<= 32
|
|
|
- shld rbx, rdx, 32
|
|
|
- shld rdx, rsi, 32
|
|
|
- shld rsi, rax, 32
|
|
|
- shld rax, rdi, 32
|
|
|
- shl rdi, 32
|
|
|
- ; a -= (mu << 32) << 192
|
|
|
- sub r11, rdi
|
|
|
- sbb r12, rax
|
|
|
- sbb r13, rsi
|
|
|
- sbb r14, rdx
|
|
|
- sbb r15, rbx
|
|
|
- adc r8, 0
|
|
|
- ; a += (mu << 32) << 64
|
|
|
- sub r9, rdi
|
|
|
- adc r10, rax
|
|
|
- adc r11, rsi
|
|
|
- adc r12, rdx
|
|
|
- adc r13, rbx
|
|
|
- adc r14, 0
|
|
|
- adc r15, 0
|
|
|
- sbb r8, 0
|
|
|
- mov rax, 18446744069414584321
|
|
|
- ; mask m and sub from result if overflow
|
|
|
- ; m[0] = -1 & mask = mask
|
|
|
- ; m[2] = 0 & mask = 0
|
|
|
- mov edi, r8d
|
|
|
- and rax, r8
|
|
|
- sub r12, r8
|
|
|
- sbb r13, rdi
|
|
|
- mov QWORD PTR [rcx], r12
|
|
|
- sbb r14, 0
|
|
|
- mov QWORD PTR [rcx+8], r13
|
|
|
- sbb r15, rax
|
|
|
- mov QWORD PTR [rcx+16], r14
|
|
|
- mov QWORD PTR [rcx+24], r15
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_sqr_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_cond_sub_avx2_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov r14, QWORD PTR [r8]
|
|
|
- mov r15, QWORD PTR [r8+8]
|
|
|
- mov rdi, QWORD PTR [r8+16]
|
|
|
- mov rsi, QWORD PTR [r8+24]
|
|
|
- and r14, r9
|
|
|
- and r15, r9
|
|
|
- and rdi, r9
|
|
|
- and rsi, r9
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- mov r13, QWORD PTR [rdx+24]
|
|
|
- sub r10, r14
|
|
|
- sbb r11, r15
|
|
|
- sbb r12, rdi
|
|
|
- sbb r13, rsi
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- mov QWORD PTR [rcx+24], r13
|
|
|
- sbb rax, rax
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_cond_sub_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Reduce the number back to 256 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_reduce_order_avx2_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov rax, rcx
|
|
|
- mov r10, rdx
|
|
|
- mov r11, r8
|
|
|
- mov r14, QWORD PTR [rax]
|
|
|
- mov r15, QWORD PTR [rax+8]
|
|
|
- mov rdi, QWORD PTR [rax+16]
|
|
|
- mov rsi, QWORD PTR [rax+24]
|
|
|
- xor r13, r13
|
|
|
- xor r12, r12
|
|
|
- ; a[0-4] += m[0-3] * mu = m[0-3] * (a[0] * mp)
|
|
|
- mov rbx, QWORD PTR [rax+32]
|
|
|
- ; mu = a[0] * mp
|
|
|
- mov rdx, r14
|
|
|
- mulx rcx, rdx, r11
|
|
|
- ; a[0] += m[0] * mu
|
|
|
- mulx r9, r8, QWORD PTR [r10]
|
|
|
- adcx r14, r8
|
|
|
- ; a[1] += m[1] * mu
|
|
|
- mulx rcx, r8, QWORD PTR [r10+8]
|
|
|
- adox r15, r9
|
|
|
- adcx r15, r8
|
|
|
- ; a[2] += m[2] * mu
|
|
|
- mulx r9, r8, QWORD PTR [r10+16]
|
|
|
- adox rdi, rcx
|
|
|
- adcx rdi, r8
|
|
|
- ; a[3] += m[3] * mu
|
|
|
- mulx rcx, r8, QWORD PTR [r10+24]
|
|
|
- adox rsi, r9
|
|
|
- adcx rsi, r8
|
|
|
- ; a[4] += carry
|
|
|
- adox rbx, rcx
|
|
|
- adcx rbx, r12
|
|
|
- ; carry
|
|
|
- adox r13, r12
|
|
|
- adcx r13, r12
|
|
|
- ; a[1-5] += m[0-3] * mu = m[0-3] * (a[1] * mp)
|
|
|
- mov r14, QWORD PTR [rax+40]
|
|
|
- ; mu = a[1] * mp
|
|
|
- mov rdx, r15
|
|
|
- mulx rcx, rdx, r11
|
|
|
- ; a[1] += m[0] * mu
|
|
|
- mulx r9, r8, QWORD PTR [r10]
|
|
|
- adcx r15, r8
|
|
|
- ; a[2] += m[1] * mu
|
|
|
- mulx rcx, r8, QWORD PTR [r10+8]
|
|
|
- adox rdi, r9
|
|
|
- adcx rdi, r8
|
|
|
- ; a[3] += m[2] * mu
|
|
|
- mulx r9, r8, QWORD PTR [r10+16]
|
|
|
- adox rsi, rcx
|
|
|
- adcx rsi, r8
|
|
|
- ; a[4] += m[3] * mu
|
|
|
- mulx rcx, r8, QWORD PTR [r10+24]
|
|
|
- adox rbx, r9
|
|
|
- adcx rbx, r8
|
|
|
- ; a[5] += carry
|
|
|
- adox r14, rcx
|
|
|
- adcx r14, r13
|
|
|
- mov r13, r12
|
|
|
- ; carry
|
|
|
- adox r13, r12
|
|
|
- adcx r13, r12
|
|
|
- ; a[2-6] += m[0-3] * mu = m[0-3] * (a[2] * mp)
|
|
|
- mov r15, QWORD PTR [rax+48]
|
|
|
- ; mu = a[2] * mp
|
|
|
- mov rdx, rdi
|
|
|
- mulx rcx, rdx, r11
|
|
|
- ; a[2] += m[0] * mu
|
|
|
- mulx r9, r8, QWORD PTR [r10]
|
|
|
- adcx rdi, r8
|
|
|
- ; a[3] += m[1] * mu
|
|
|
- mulx rcx, r8, QWORD PTR [r10+8]
|
|
|
- adox rsi, r9
|
|
|
- adcx rsi, r8
|
|
|
- ; a[4] += m[2] * mu
|
|
|
- mulx r9, r8, QWORD PTR [r10+16]
|
|
|
- adox rbx, rcx
|
|
|
- adcx rbx, r8
|
|
|
- ; a[5] += m[3] * mu
|
|
|
- mulx rcx, r8, QWORD PTR [r10+24]
|
|
|
- adox r14, r9
|
|
|
- adcx r14, r8
|
|
|
- ; a[6] += carry
|
|
|
- adox r15, rcx
|
|
|
- adcx r15, r13
|
|
|
- mov r13, r12
|
|
|
- ; carry
|
|
|
- adox r13, r12
|
|
|
- adcx r13, r12
|
|
|
- ; a[3-7] += m[0-3] * mu = m[0-3] * (a[3] * mp)
|
|
|
- mov rdi, QWORD PTR [rax+56]
|
|
|
- ; mu = a[3] * mp
|
|
|
- mov rdx, rsi
|
|
|
- mulx rcx, rdx, r11
|
|
|
- ; a[3] += m[0] * mu
|
|
|
- mulx r9, r8, QWORD PTR [r10]
|
|
|
- adcx rsi, r8
|
|
|
- ; a[4] += m[1] * mu
|
|
|
- mulx rcx, r8, QWORD PTR [r10+8]
|
|
|
- adox rbx, r9
|
|
|
- adcx rbx, r8
|
|
|
- ; a[5] += m[2] * mu
|
|
|
- mulx r9, r8, QWORD PTR [r10+16]
|
|
|
- adox r14, rcx
|
|
|
- adcx r14, r8
|
|
|
- ; a[6] += m[3] * mu
|
|
|
- mulx rcx, r8, QWORD PTR [r10+24]
|
|
|
- adox r15, r9
|
|
|
- adcx r15, r8
|
|
|
- ; a[7] += carry
|
|
|
- adox rdi, rcx
|
|
|
- adcx rdi, r13
|
|
|
- mov r13, r12
|
|
|
- ; carry
|
|
|
- adox r13, r12
|
|
|
- adcx r13, r12
|
|
|
- ; Subtract mod if carry
|
|
|
- neg r13
|
|
|
- mov r8, 17562291160714782033
|
|
|
- mov r9, 13611842547513532036
|
|
|
- mov rdx, 18446744069414584320
|
|
|
- and r8, r13
|
|
|
- and r9, r13
|
|
|
- and rdx, r13
|
|
|
- sub rbx, r8
|
|
|
- sbb r14, r9
|
|
|
- sbb r15, r13
|
|
|
- sbb rdi, rdx
|
|
|
- mov QWORD PTR [rax], rbx
|
|
|
- mov QWORD PTR [rax+8], r14
|
|
|
- mov QWORD PTR [rax+16], r15
|
|
|
- mov QWORD PTR [rax+24], rdi
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_reduce_order_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
|
|
|
-; *
|
|
|
-; * r Result of division by 2.
|
|
|
-; * a Number to divide.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_div2_avx2_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov r12, 18446744069414584321
|
|
|
- mov r13, rax
|
|
|
- and r13, 1
|
|
|
- neg r13
|
|
|
- mov r11d, r13d
|
|
|
- and r12, r13
|
|
|
- add rax, r13
|
|
|
- adc r8, r11
|
|
|
- adc r9, 0
|
|
|
- adc r10, r12
|
|
|
- mov r13, 0
|
|
|
- adc r13, 0
|
|
|
- shrd rax, r8, 1
|
|
|
- shrd r8, r9, 1
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r13, 1
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mont_div2_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_get_entry_64_4 PROC
|
|
|
- sub rsp, 96
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- ; From entry 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm9, r8d
|
|
|
- add rdx, 64
|
|
|
- movd xmm11, eax
|
|
|
- mov rax, 63
|
|
|
- pshufd xmm11, xmm11, 0
|
|
|
- pshufd xmm9, xmm9, 0
|
|
|
- pxor xmm10, xmm10
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- movdqa xmm10, xmm11
|
|
|
-L_256_get_entry_64_4_start_0:
|
|
|
- movdqa xmm8, xmm10
|
|
|
- paddd xmm10, xmm11
|
|
|
- pcmpeqd xmm8, xmm9
|
|
|
- movdqu xmm4, [rdx]
|
|
|
- movdqu xmm5, [rdx+16]
|
|
|
- movdqu xmm6, [rdx+32]
|
|
|
- movdqu xmm7, [rdx+48]
|
|
|
- add rdx, 64
|
|
|
- pand xmm4, xmm8
|
|
|
- pand xmm5, xmm8
|
|
|
- pand xmm6, xmm8
|
|
|
- pand xmm7, xmm8
|
|
|
- por xmm0, xmm4
|
|
|
- por xmm1, xmm5
|
|
|
- por xmm2, xmm6
|
|
|
- por xmm3, xmm7
|
|
|
- dec rax
|
|
|
- jnz L_256_get_entry_64_4_start_0
|
|
|
- movdqu [rcx], xmm0
|
|
|
- movdqu [rcx+16], xmm1
|
|
|
- movdqu [rcx+64], xmm2
|
|
|
- movdqu [rcx+80], xmm3
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- add rsp, 96
|
|
|
- ret
|
|
|
-sp_256_get_entry_64_4 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_get_entry_64_avx2_4 PROC
|
|
|
- sub rsp, 32
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- mov rax, 1
|
|
|
- movd xmm5, r8d
|
|
|
- add rdx, 64
|
|
|
- movd xmm7, eax
|
|
|
- mov rax, 64
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpermd ymm5, ymm6, ymm5
|
|
|
- vpermd ymm7, ymm6, ymm7
|
|
|
- vpxor ymm0, ymm0, ymm0
|
|
|
- vpxor ymm1, ymm1, ymm1
|
|
|
- vmovdqa ymm6, ymm7
|
|
|
-L_256_get_entry_64_avx2_4_start:
|
|
|
- vpcmpeqd ymm4, ymm6, ymm5
|
|
|
- vpaddd ymm6, ymm6, ymm7
|
|
|
- vmovupd ymm2, YMMWORD PTR [rdx]
|
|
|
- vmovupd ymm3, YMMWORD PTR [rdx+32]
|
|
|
- add rdx, 64
|
|
|
- vpand ymm2, ymm2, ymm4
|
|
|
- vpand ymm3, ymm3, ymm4
|
|
|
- vpor ymm0, ymm0, ymm2
|
|
|
- vpor ymm1, ymm1, ymm3
|
|
|
- dec rax
|
|
|
- jnz L_256_get_entry_64_avx2_4_start
|
|
|
- vmovupd YMMWORD PTR [rcx], ymm0
|
|
|
- vmovupd YMMWORD PTR [rcx+64], ymm1
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- add rsp, 32
|
|
|
- ret
|
|
|
-sp_256_get_entry_64_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_get_entry_65_4 PROC
|
|
|
- sub rsp, 96
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- ; From entry 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm9, r8d
|
|
|
- add rdx, 64
|
|
|
- movd xmm11, eax
|
|
|
- mov rax, 64
|
|
|
- pshufd xmm11, xmm11, 0
|
|
|
- pshufd xmm9, xmm9, 0
|
|
|
- pxor xmm10, xmm10
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- movdqa xmm10, xmm11
|
|
|
-L_256_get_entry_65_4_start_0:
|
|
|
- movdqa xmm8, xmm10
|
|
|
- paddd xmm10, xmm11
|
|
|
- pcmpeqd xmm8, xmm9
|
|
|
- movdqu xmm4, [rdx]
|
|
|
- movdqu xmm5, [rdx+16]
|
|
|
- movdqu xmm6, [rdx+32]
|
|
|
- movdqu xmm7, [rdx+48]
|
|
|
- add rdx, 64
|
|
|
- pand xmm4, xmm8
|
|
|
- pand xmm5, xmm8
|
|
|
- pand xmm6, xmm8
|
|
|
- pand xmm7, xmm8
|
|
|
- por xmm0, xmm4
|
|
|
- por xmm1, xmm5
|
|
|
- por xmm2, xmm6
|
|
|
- por xmm3, xmm7
|
|
|
- dec rax
|
|
|
- jnz L_256_get_entry_65_4_start_0
|
|
|
- movdqu [rcx], xmm0
|
|
|
- movdqu [rcx+16], xmm1
|
|
|
- movdqu [rcx+64], xmm2
|
|
|
- movdqu [rcx+80], xmm3
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- add rsp, 96
|
|
|
- ret
|
|
|
-sp_256_get_entry_65_4 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_get_entry_65_avx2_4 PROC
|
|
|
- sub rsp, 32
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- mov rax, 1
|
|
|
- movd xmm5, r8d
|
|
|
- add rdx, 64
|
|
|
- movd xmm7, eax
|
|
|
- mov rax, 65
|
|
|
- vpxor ymm6, ymm6, ymm6
|
|
|
- vpermd ymm5, ymm6, ymm5
|
|
|
- vpermd ymm7, ymm6, ymm7
|
|
|
- vpxor ymm0, ymm0, ymm0
|
|
|
- vpxor ymm1, ymm1, ymm1
|
|
|
- vmovdqa ymm6, ymm7
|
|
|
-L_256_get_entry_65_avx2_4_start:
|
|
|
- vpcmpeqd ymm4, ymm6, ymm5
|
|
|
- vpaddd ymm6, ymm6, ymm7
|
|
|
- vmovupd ymm2, YMMWORD PTR [rdx]
|
|
|
- vmovupd ymm3, YMMWORD PTR [rdx+32]
|
|
|
- add rdx, 64
|
|
|
- vpand ymm2, ymm2, ymm4
|
|
|
- vpand ymm3, ymm3, ymm4
|
|
|
- vpor ymm0, ymm0, ymm2
|
|
|
- vpor ymm1, ymm1, ymm3
|
|
|
- dec rax
|
|
|
- jnz L_256_get_entry_65_avx2_4_start
|
|
|
- vmovupd YMMWORD PTR [rcx], ymm0
|
|
|
- vmovupd YMMWORD PTR [rcx+64], ymm1
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- add rsp, 32
|
|
|
- ret
|
|
|
-sp_256_get_entry_65_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-; /* Add 1 to a. (a = a + 1)
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_add_one_4 PROC
|
|
|
- add QWORD PTR [rcx], 1
|
|
|
- adc QWORD PTR [rcx+8], 0
|
|
|
- adc QWORD PTR [rcx+16], 0
|
|
|
- adc QWORD PTR [rcx+24], 0
|
|
|
- ret
|
|
|
-sp_256_add_one_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_from_bin_bswap PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 32
|
|
|
- xor r13, r13
|
|
|
- jmp L_256_from_bin_bswap_64_end
|
|
|
-L_256_from_bin_bswap_64_start:
|
|
|
- sub r11, 64
|
|
|
- mov rax, QWORD PTR [r11+56]
|
|
|
- mov r10, QWORD PTR [r11+48]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov rax, QWORD PTR [r11+40]
|
|
|
- mov r10, QWORD PTR [r11+32]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov rax, QWORD PTR [r11+24]
|
|
|
- mov r10, QWORD PTR [r11+16]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov rax, QWORD PTR [r11+8]
|
|
|
- mov r10, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_256_from_bin_bswap_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_256_from_bin_bswap_64_start
|
|
|
- jmp L_256_from_bin_bswap_8_end
|
|
|
-L_256_from_bin_bswap_8_start:
|
|
|
- sub r11, 8
|
|
|
- mov rax, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_256_from_bin_bswap_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_256_from_bin_bswap_8_start
|
|
|
- cmp r9, r13
|
|
|
- je L_256_from_bin_bswap_hi_end
|
|
|
- mov r10, r13
|
|
|
- mov rax, r13
|
|
|
-L_256_from_bin_bswap_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_256_from_bin_bswap_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_256_from_bin_bswap_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_256_from_bin_bswap_zero_end
|
|
|
-L_256_from_bin_bswap_zero_start:
|
|
|
- mov QWORD PTR [rcx], r13
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_256_from_bin_bswap_zero_start
|
|
|
-L_256_from_bin_bswap_zero_end:
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_from_bin_bswap ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the movbe instruction which is an optional instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_from_bin_movbe PROC
|
|
|
- push r12
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 32
|
|
|
- jmp L_256_from_bin_movbe_64_end
|
|
|
-L_256_from_bin_movbe_64_start:
|
|
|
- sub r11, 64
|
|
|
- movbe rax, QWORD PTR [r11+56]
|
|
|
- movbe r10, QWORD PTR [r11+48]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- movbe rax, QWORD PTR [r11+40]
|
|
|
- movbe r10, QWORD PTR [r11+32]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- movbe rax, QWORD PTR [r11+24]
|
|
|
- movbe r10, QWORD PTR [r11+16]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- movbe rax, QWORD PTR [r11+8]
|
|
|
- movbe r10, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_256_from_bin_movbe_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_256_from_bin_movbe_64_start
|
|
|
- jmp L_256_from_bin_movbe_8_end
|
|
|
-L_256_from_bin_movbe_8_start:
|
|
|
- sub r11, 8
|
|
|
- movbe rax, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_256_from_bin_movbe_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_256_from_bin_movbe_8_start
|
|
|
- cmp r9, 0
|
|
|
- je L_256_from_bin_movbe_hi_end
|
|
|
- mov r10, 0
|
|
|
- mov rax, 0
|
|
|
-L_256_from_bin_movbe_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_256_from_bin_movbe_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_256_from_bin_movbe_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_256_from_bin_movbe_zero_end
|
|
|
-L_256_from_bin_movbe_zero_start:
|
|
|
- mov QWORD PTR [rcx], 0
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_256_from_bin_movbe_zero_start
|
|
|
-L_256_from_bin_movbe_zero_end:
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_from_bin_movbe ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 32
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_to_bin_bswap_4 PROC
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx], rax
|
|
|
- mov QWORD PTR [rdx+8], r8
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+16], rax
|
|
|
- mov QWORD PTR [rdx+24], r8
|
|
|
- ret
|
|
|
-sp_256_to_bin_bswap_4 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 32
|
|
|
-; * Uses the movbe instruction which is optional.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_to_bin_movbe_4 PROC
|
|
|
- movbe rax, QWORD PTR [rcx+24]
|
|
|
- movbe r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rdx], rax
|
|
|
- mov QWORD PTR [rdx+8], r8
|
|
|
- movbe rax, QWORD PTR [rcx+8]
|
|
|
- movbe r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rdx+16], rax
|
|
|
- mov QWORD PTR [rdx+24], r8
|
|
|
- ret
|
|
|
-sp_256_to_bin_movbe_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Sub b from a into a. (a -= b)
|
|
|
-; *
|
|
|
-; * a A single precision integer and result.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_sub_in_place_4 PROC
|
|
|
- mov r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- sub QWORD PTR [rcx], r8
|
|
|
- sbb QWORD PTR [rcx+8], r9
|
|
|
- sbb QWORD PTR [rcx+16], r10
|
|
|
- sbb QWORD PTR [rcx+24], r11
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_256_sub_in_place_4 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mul_d_4 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- mov r10, rax
|
|
|
- mov r11, rdx
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- ; A[1] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B
|
|
|
- mov rax, r8
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mul_d_4 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mul_d_avx2_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rdx, r8
|
|
|
- xor r13, r13
|
|
|
- mulx r12, r11, QWORD PTR [rax]
|
|
|
- mov QWORD PTR [rcx], r11
|
|
|
- ; A[1] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+8], r12
|
|
|
- ; A[2] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- ; A[3] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- adcx r11, r13
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mul_d_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF _WIN64
|
|
|
-; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
|
|
|
-; *
|
|
|
-; * d1 The high order half of the number to divide.
|
|
|
-; * d0 The low order half of the number to divide.
|
|
|
-; * div The dividend.
|
|
|
-; * returns the result of the division.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-div_256_word_asm_4 PROC
|
|
|
- mov r9, rdx
|
|
|
- mov rax, r9
|
|
|
- mov rdx, rcx
|
|
|
- div r8
|
|
|
- ret
|
|
|
-div_256_word_asm_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply two Montgomery form numbers mod the modulus (prime).
|
|
|
-; * (r = a * b mod m)
|
|
|
-; *
|
|
|
-; * r Result of multiplication.
|
|
|
-; * a First number to multiply in Montgomery form.
|
|
|
-; * b Second number to multiply in Montgomery form.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_mul_order_avx2_4 PROC
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov rbp, r8
|
|
|
- mov rax, rdx
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- mov r14, QWORD PTR [rbp+8]
|
|
|
- ; A[0] * B[0]
|
|
|
- mulx r9, r8, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- ; A[0] * B[1]
|
|
|
- mulx r10, rdi, r14
|
|
|
- adcx r9, rdi
|
|
|
- ; A[0] * B[2]
|
|
|
- mulx r11, rdi, QWORD PTR [rbp+16]
|
|
|
- adcx r10, rdi
|
|
|
- ; A[0] * B[3]
|
|
|
- mulx r12, rdi, QWORD PTR [rbp+24]
|
|
|
- adcx r11, rdi
|
|
|
- mov rdx, QWORD PTR [rax+8]
|
|
|
- adcx r12, rbx
|
|
|
- ; A[1] * B[0]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- adcx r9, rdi
|
|
|
- ; A[1] * B[1]
|
|
|
- mulx r15, rdi, r14
|
|
|
- adox r10, rsi
|
|
|
- adcx r10, rdi
|
|
|
- ; A[1] * B[2]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp+16]
|
|
|
- adox r11, r15
|
|
|
- adcx r11, rdi
|
|
|
- ; A[1] * B[3]
|
|
|
- mulx r13, rdi, QWORD PTR [rbp+24]
|
|
|
- adox r12, rsi
|
|
|
- adcx r12, rdi
|
|
|
- adox r13, rbx
|
|
|
- mov rdx, QWORD PTR [rax+16]
|
|
|
- adcx r13, rbx
|
|
|
- ; A[2] * B[0]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- adcx r10, rdi
|
|
|
- ; A[2] * B[1]
|
|
|
- mulx r15, rdi, r14
|
|
|
- adox r11, rsi
|
|
|
- adcx r11, rdi
|
|
|
- ; A[2] * B[2]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp+16]
|
|
|
- adox r12, r15
|
|
|
- adcx r12, rdi
|
|
|
- ; A[2] * B[3]
|
|
|
- mulx r14, rdi, QWORD PTR [rbp+24]
|
|
|
- adox r13, rsi
|
|
|
- adcx r13, rdi
|
|
|
- adox r14, rbx
|
|
|
- mov rdx, QWORD PTR [rax+24]
|
|
|
- adcx r14, rbx
|
|
|
- ; A[3] * B[0]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp]
|
|
|
- xor rbx, rbx
|
|
|
- adcx r11, rdi
|
|
|
- ; A[3] * B[1]
|
|
|
- mulx r15, rdi, QWORD PTR [rbp+8]
|
|
|
- adox r12, rsi
|
|
|
- adcx r12, rdi
|
|
|
- ; A[3] * B[2]
|
|
|
- mulx rsi, rdi, QWORD PTR [rbp+16]
|
|
|
- adox r13, r15
|
|
|
- adcx r13, rdi
|
|
|
- ; A[3] * B[3]
|
|
|
- mulx r15, rdi, QWORD PTR [rbp+24]
|
|
|
- adox r14, rsi
|
|
|
- adcx r14, rdi
|
|
|
- adox r15, rbx
|
|
|
- adcx r15, rbx
|
|
|
- ; Start Reduction
|
|
|
- mov rbx, 14758798090332847183
|
|
|
- ; A[0]
|
|
|
- mov rdx, rbx
|
|
|
- imul rdx, r8
|
|
|
- mov rdi, 17562291160714782033
|
|
|
- xor rbp, rbp
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 13611842547513532036
|
|
|
- adcx r8, rsi
|
|
|
- adox r9, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744073709551615
|
|
|
- adcx r9, rsi
|
|
|
- adox r10, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744069414584320
|
|
|
- adcx r10, rsi
|
|
|
- adox r11, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- adcx r11, rsi
|
|
|
- adox r12, rax
|
|
|
- adcx r12, rbp
|
|
|
- mov r8, rbp
|
|
|
- ; carry
|
|
|
- adox r8, rbp
|
|
|
- adcx r8, rbp
|
|
|
- ; A[1]
|
|
|
- mov rdx, rbx
|
|
|
- imul rdx, r9
|
|
|
- mov rdi, 17562291160714782033
|
|
|
- xor rbp, rbp
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 13611842547513532036
|
|
|
- adcx r9, rsi
|
|
|
- adox r10, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744073709551615
|
|
|
- adcx r10, rsi
|
|
|
- adox r11, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744069414584320
|
|
|
- adcx r11, rsi
|
|
|
- adox r12, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- adcx r12, rsi
|
|
|
- adox r13, rax
|
|
|
- adcx r13, r8
|
|
|
- mov r8, rbp
|
|
|
- ; carry
|
|
|
- adox r8, rbp
|
|
|
- adcx r8, rbp
|
|
|
- ; A[2]
|
|
|
- mov rdx, rbx
|
|
|
- imul rdx, r10
|
|
|
- mov rdi, 17562291160714782033
|
|
|
- xor rbp, rbp
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 13611842547513532036
|
|
|
- adcx r10, rsi
|
|
|
- adox r11, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744073709551615
|
|
|
- adcx r11, rsi
|
|
|
- adox r12, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744069414584320
|
|
|
- adcx r12, rsi
|
|
|
- adox r13, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- adcx r13, rsi
|
|
|
- adox r14, rax
|
|
|
- adcx r14, r8
|
|
|
- mov r8, rbp
|
|
|
- ; carry
|
|
|
- adox r8, rbp
|
|
|
- adcx r8, rbp
|
|
|
- ; A[3]
|
|
|
- mov rdx, rbx
|
|
|
- imul rdx, r11
|
|
|
- mov rdi, 17562291160714782033
|
|
|
- xor rbp, rbp
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 13611842547513532036
|
|
|
- adcx r11, rsi
|
|
|
- adox r12, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744073709551615
|
|
|
- adcx r12, rsi
|
|
|
- adox r13, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744069414584320
|
|
|
- adcx r13, rsi
|
|
|
- adox r14, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- adcx r14, rsi
|
|
|
- adox r15, rax
|
|
|
- adcx r15, r8
|
|
|
- mov r8, rbp
|
|
|
- ; carry
|
|
|
- adox r8, rbp
|
|
|
- adcx r8, rbp
|
|
|
- neg r8
|
|
|
- mov rdi, 17562291160714782033
|
|
|
- mov rbx, 13611842547513532036
|
|
|
- and rdi, r8
|
|
|
- mov rbp, 18446744069414584320
|
|
|
- and rbx, r8
|
|
|
- and rbp, r8
|
|
|
- sub r12, rdi
|
|
|
- sbb r13, rbx
|
|
|
- mov QWORD PTR [rcx], r12
|
|
|
- sbb r14, r8
|
|
|
- mov QWORD PTR [rcx+8], r13
|
|
|
- sbb r15, rbp
|
|
|
- mov QWORD PTR [rcx+16], r14
|
|
|
- mov QWORD PTR [rcx+24], r15
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- ret
|
|
|
-sp_256_mont_mul_order_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
|
|
|
-; *
|
|
|
-; * r Result of squaring.
|
|
|
-; * a Number to square in Montgomery form.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mont_sqr_order_avx2_4 PROC
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov rax, rdx
|
|
|
- xor r8, r8
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- mov rsi, QWORD PTR [rax+8]
|
|
|
- mov rbx, QWORD PTR [rax+16]
|
|
|
- mov r15, QWORD PTR [rax+24]
|
|
|
- ; A[0] * A[1]
|
|
|
- mulx r10, r9, rsi
|
|
|
- ; A[0] * A[2]
|
|
|
- mulx r11, r8, rbx
|
|
|
- adox r10, r8
|
|
|
- ; A[0] * A[3]
|
|
|
- mulx r12, r8, r15
|
|
|
- mov rdx, rsi
|
|
|
- adox r11, r8
|
|
|
- ; A[1] * A[2]
|
|
|
- mulx rdi, r8, rbx
|
|
|
- mov rdx, r15
|
|
|
- adcx r11, r8
|
|
|
- ; A[1] * A[3]
|
|
|
- mulx r13, r8, rsi
|
|
|
- mov r15, 0
|
|
|
- adox r12, rdi
|
|
|
- adcx r12, r8
|
|
|
- ; A[2] * A[3]
|
|
|
- mulx r14, r8, rbx
|
|
|
- adox r13, r15
|
|
|
- adcx r13, r8
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- ; Double with Carry Flag
|
|
|
- xor r15, r15
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- mulx rdi, r8, rdx
|
|
|
- adcx r9, r9
|
|
|
- adcx r10, r10
|
|
|
- adox r9, rdi
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rdx, QWORD PTR [rax+8]
|
|
|
- mulx rbx, rsi, rdx
|
|
|
- adcx r11, r11
|
|
|
- adox r10, rsi
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rdx, QWORD PTR [rax+16]
|
|
|
- mulx rsi, rdi, rdx
|
|
|
- adcx r12, r12
|
|
|
- adox r11, rbx
|
|
|
- adcx r13, r13
|
|
|
- adox r12, rdi
|
|
|
- adcx r14, r14
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rdx, QWORD PTR [rax+24]
|
|
|
- mulx rbx, rdi, rdx
|
|
|
- adox r13, rsi
|
|
|
- adcx r15, r15
|
|
|
- adox r14, rdi
|
|
|
- adox r15, rbx
|
|
|
- ; Start Reduction
|
|
|
- mov rbx, 14758798090332847183
|
|
|
- ; A[0]
|
|
|
- mov rdx, rbx
|
|
|
- imul rdx, r8
|
|
|
- mov rdi, 17562291160714782033
|
|
|
- xor rbp, rbp
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 13611842547513532036
|
|
|
- adcx r8, rsi
|
|
|
- adox r9, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744073709551615
|
|
|
- adcx r9, rsi
|
|
|
- adox r10, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744069414584320
|
|
|
- adcx r10, rsi
|
|
|
- adox r11, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- adcx r11, rsi
|
|
|
- adox r12, rax
|
|
|
- adcx r12, rbp
|
|
|
- mov r8, rbp
|
|
|
- ; carry
|
|
|
- adox r8, rbp
|
|
|
- adcx r8, rbp
|
|
|
- ; A[1]
|
|
|
- mov rdx, rbx
|
|
|
- imul rdx, r9
|
|
|
- mov rdi, 17562291160714782033
|
|
|
- xor rbp, rbp
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 13611842547513532036
|
|
|
- adcx r9, rsi
|
|
|
- adox r10, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744073709551615
|
|
|
- adcx r10, rsi
|
|
|
- adox r11, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744069414584320
|
|
|
- adcx r11, rsi
|
|
|
- adox r12, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- adcx r12, rsi
|
|
|
- adox r13, rax
|
|
|
- adcx r13, r8
|
|
|
- mov r8, rbp
|
|
|
- ; carry
|
|
|
- adox r8, rbp
|
|
|
- adcx r8, rbp
|
|
|
- ; A[2]
|
|
|
- mov rdx, rbx
|
|
|
- imul rdx, r10
|
|
|
- mov rdi, 17562291160714782033
|
|
|
- xor rbp, rbp
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 13611842547513532036
|
|
|
- adcx r10, rsi
|
|
|
- adox r11, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744073709551615
|
|
|
- adcx r11, rsi
|
|
|
- adox r12, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744069414584320
|
|
|
- adcx r12, rsi
|
|
|
- adox r13, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- adcx r13, rsi
|
|
|
- adox r14, rax
|
|
|
- adcx r14, r8
|
|
|
- mov r8, rbp
|
|
|
- ; carry
|
|
|
- adox r8, rbp
|
|
|
- adcx r8, rbp
|
|
|
- ; A[3]
|
|
|
- mov rdx, rbx
|
|
|
- imul rdx, r11
|
|
|
- mov rdi, 17562291160714782033
|
|
|
- xor rbp, rbp
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 13611842547513532036
|
|
|
- adcx r11, rsi
|
|
|
- adox r12, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744073709551615
|
|
|
- adcx r12, rsi
|
|
|
- adox r13, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- mov rdi, 18446744069414584320
|
|
|
- adcx r13, rsi
|
|
|
- adox r14, rax
|
|
|
- mulx rax, rsi, rdi
|
|
|
- adcx r14, rsi
|
|
|
- adox r15, rax
|
|
|
- adcx r15, r8
|
|
|
- mov r8, rbp
|
|
|
- ; carry
|
|
|
- adox r8, rbp
|
|
|
- adcx r8, rbp
|
|
|
- neg r8
|
|
|
- mov rdi, 17562291160714782033
|
|
|
- mov rbx, 13611842547513532036
|
|
|
- and rdi, r8
|
|
|
- mov rbp, 18446744069414584320
|
|
|
- and rbx, r8
|
|
|
- and rbp, r8
|
|
|
- sub r12, rdi
|
|
|
- sbb r13, rbx
|
|
|
- mov QWORD PTR [rcx], r12
|
|
|
- sbb r14, r8
|
|
|
- mov QWORD PTR [rcx+8], r13
|
|
|
- sbb r15, rbp
|
|
|
- mov QWORD PTR [rcx+16], r14
|
|
|
- mov QWORD PTR [rcx+24], r15
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- ret
|
|
|
-sp_256_mont_sqr_order_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Non-constant time modular inversion.
|
|
|
-; *
|
|
|
-; * @param [out] r Resulting number.
|
|
|
-; * @param [in] a Number to invert.
|
|
|
-; * @param [in] m Modulus.
|
|
|
-; * @return MP_OKAY on success.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mod_inv_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- sub rsp, 513
|
|
|
- mov r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [r8+24]
|
|
|
- mov r13, QWORD PTR [rdx]
|
|
|
- mov r14, QWORD PTR [rdx+8]
|
|
|
- mov r15, QWORD PTR [rdx+16]
|
|
|
- mov rdi, QWORD PTR [rdx+24]
|
|
|
- mov rsi, 0
|
|
|
- test r13b, 1
|
|
|
- jnz L_256_mod_inv_4_v_even_end
|
|
|
-L_256_mod_inv_4_v_even_start:
|
|
|
- shrd r13, r14, 1
|
|
|
- shrd r14, r15, 1
|
|
|
- shrd r15, rdi, 1
|
|
|
- shr rdi, 1
|
|
|
- mov BYTE PTR [rsp+rsi], 1
|
|
|
- inc rsi
|
|
|
- test r13b, 1
|
|
|
- jz L_256_mod_inv_4_v_even_start
|
|
|
-L_256_mod_inv_4_v_even_end:
|
|
|
-L_256_mod_inv_4_uv_start:
|
|
|
- cmp r12, rdi
|
|
|
- jb L_256_mod_inv_4_uv_v
|
|
|
- ja L_256_mod_inv_4_uv_u
|
|
|
- cmp r11, r15
|
|
|
- jb L_256_mod_inv_4_uv_v
|
|
|
- ja L_256_mod_inv_4_uv_u
|
|
|
- cmp r10, r14
|
|
|
- jb L_256_mod_inv_4_uv_v
|
|
|
- ja L_256_mod_inv_4_uv_u
|
|
|
- cmp r9, r13
|
|
|
- jb L_256_mod_inv_4_uv_v
|
|
|
-L_256_mod_inv_4_uv_u:
|
|
|
- mov BYTE PTR [rsp+rsi], 2
|
|
|
- inc rsi
|
|
|
- sub r9, r13
|
|
|
- sbb r10, r14
|
|
|
- sbb r11, r15
|
|
|
- sbb r12, rdi
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r11, 1
|
|
|
- shrd r11, r12, 1
|
|
|
- shr r12, 1
|
|
|
- test r9b, 1
|
|
|
- jnz L_256_mod_inv_4_usubv_even_end
|
|
|
-L_256_mod_inv_4_usubv_even_start:
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r11, 1
|
|
|
- shrd r11, r12, 1
|
|
|
- shr r12, 1
|
|
|
- mov BYTE PTR [rsp+rsi], 0
|
|
|
- inc rsi
|
|
|
- test r9b, 1
|
|
|
- jz L_256_mod_inv_4_usubv_even_start
|
|
|
-L_256_mod_inv_4_usubv_even_end:
|
|
|
- cmp r9, 1
|
|
|
- jne L_256_mod_inv_4_uv_start
|
|
|
- mov rdx, r10
|
|
|
- or rdx, r11
|
|
|
- jne L_256_mod_inv_4_uv_start
|
|
|
- or rdx, r12
|
|
|
- jne L_256_mod_inv_4_uv_start
|
|
|
- mov al, 1
|
|
|
- jmp L_256_mod_inv_4_uv_end
|
|
|
-L_256_mod_inv_4_uv_v:
|
|
|
- mov BYTE PTR [rsp+rsi], 3
|
|
|
- inc rsi
|
|
|
- sub r13, r9
|
|
|
- sbb r14, r10
|
|
|
- sbb r15, r11
|
|
|
- sbb rdi, r12
|
|
|
- shrd r13, r14, 1
|
|
|
- shrd r14, r15, 1
|
|
|
- shrd r15, rdi, 1
|
|
|
- shr rdi, 1
|
|
|
- test r13b, 1
|
|
|
- jnz L_256_mod_inv_4_vsubu_even_end
|
|
|
-L_256_mod_inv_4_vsubu_even_start:
|
|
|
- shrd r13, r14, 1
|
|
|
- shrd r14, r15, 1
|
|
|
- shrd r15, rdi, 1
|
|
|
- shr rdi, 1
|
|
|
- mov BYTE PTR [rsp+rsi], 1
|
|
|
- inc rsi
|
|
|
- test r13b, 1
|
|
|
- jz L_256_mod_inv_4_vsubu_even_start
|
|
|
-L_256_mod_inv_4_vsubu_even_end:
|
|
|
- cmp r13, 1
|
|
|
- jne L_256_mod_inv_4_uv_start
|
|
|
- mov rdx, r14
|
|
|
- or rdx, r15
|
|
|
- jne L_256_mod_inv_4_uv_start
|
|
|
- or rdx, rdi
|
|
|
- jne L_256_mod_inv_4_uv_start
|
|
|
- mov al, 0
|
|
|
-L_256_mod_inv_4_uv_end:
|
|
|
- mov r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [r8+24]
|
|
|
- mov r13, 1
|
|
|
- xor r14, r14
|
|
|
- xor r15, r15
|
|
|
- xor rdi, rdi
|
|
|
- mov BYTE PTR [rsp+rsi], 7
|
|
|
- mov dl, BYTE PTR [rsp]
|
|
|
- mov rsi, 1
|
|
|
- cmp dl, 1
|
|
|
- je L_256_mod_inv_4_op_div2_d
|
|
|
- jl L_256_mod_inv_4_op_div2_b
|
|
|
- cmp dl, 3
|
|
|
- je L_256_mod_inv_4_op_d_sub_b
|
|
|
- jl L_256_mod_inv_4_op_b_sub_d
|
|
|
- jmp L_256_mod_inv_4_op_end
|
|
|
-L_256_mod_inv_4_op_b_sub_d:
|
|
|
- sub r9, r13
|
|
|
- sbb r10, r14
|
|
|
- sbb r11, r15
|
|
|
- sbb r12, rdi
|
|
|
- jnc L_256_mod_inv_4_op_div2_b
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- adc r11, QWORD PTR [r8+16]
|
|
|
- adc r12, QWORD PTR [r8+24]
|
|
|
-L_256_mod_inv_4_op_div2_b:
|
|
|
- test r9b, 1
|
|
|
- mov rdx, 0
|
|
|
- jz L_256_mod_inv_4_op_div2_b_mod
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- adc r11, QWORD PTR [r8+16]
|
|
|
- adc r12, QWORD PTR [r8+24]
|
|
|
- adc rdx, 0
|
|
|
-L_256_mod_inv_4_op_div2_b_mod:
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r11, 1
|
|
|
- shrd r11, r12, 1
|
|
|
- shrd r12, rdx, 1
|
|
|
- mov dl, BYTE PTR [rsp+rsi]
|
|
|
- inc rsi
|
|
|
- cmp dl, 1
|
|
|
- je L_256_mod_inv_4_op_div2_d
|
|
|
- jl L_256_mod_inv_4_op_div2_b
|
|
|
- cmp dl, 3
|
|
|
- je L_256_mod_inv_4_op_d_sub_b
|
|
|
- jl L_256_mod_inv_4_op_b_sub_d
|
|
|
- jmp L_256_mod_inv_4_op_end
|
|
|
-L_256_mod_inv_4_op_d_sub_b:
|
|
|
- sub r13, r9
|
|
|
- sbb r14, r10
|
|
|
- sbb r15, r11
|
|
|
- sbb rdi, r12
|
|
|
- jnc L_256_mod_inv_4_op_div2_d
|
|
|
- add r13, QWORD PTR [r8]
|
|
|
- adc r14, QWORD PTR [r8+8]
|
|
|
- adc r15, QWORD PTR [r8+16]
|
|
|
- adc rdi, QWORD PTR [r8+24]
|
|
|
-L_256_mod_inv_4_op_div2_d:
|
|
|
- test r13b, 1
|
|
|
- mov rdx, 0
|
|
|
- jz L_256_mod_inv_4_op_div2_d_mod
|
|
|
- add r13, QWORD PTR [r8]
|
|
|
- adc r14, QWORD PTR [r8+8]
|
|
|
- adc r15, QWORD PTR [r8+16]
|
|
|
- adc rdi, QWORD PTR [r8+24]
|
|
|
- adc rdx, 0
|
|
|
-L_256_mod_inv_4_op_div2_d_mod:
|
|
|
- shrd r13, r14, 1
|
|
|
- shrd r14, r15, 1
|
|
|
- shrd r15, rdi, 1
|
|
|
- shrd rdi, rdx, 1
|
|
|
- mov dl, BYTE PTR [rsp+rsi]
|
|
|
- inc rsi
|
|
|
- cmp dl, 1
|
|
|
- je L_256_mod_inv_4_op_div2_d
|
|
|
- jl L_256_mod_inv_4_op_div2_b
|
|
|
- cmp dl, 3
|
|
|
- je L_256_mod_inv_4_op_d_sub_b
|
|
|
- jl L_256_mod_inv_4_op_b_sub_d
|
|
|
-L_256_mod_inv_4_op_end:
|
|
|
- cmp al, 1
|
|
|
- jne L_256_mod_inv_4_store_d
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- jmp L_256_mod_inv_4_store_end
|
|
|
-L_256_mod_inv_4_store_d:
|
|
|
- mov QWORD PTR [rcx], r13
|
|
|
- mov QWORD PTR [rcx+8], r14
|
|
|
- mov QWORD PTR [rcx+16], r15
|
|
|
- mov QWORD PTR [rcx+24], rdi
|
|
|
-L_256_mod_inv_4_store_end:
|
|
|
- add rsp, 513
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mod_inv_4 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-_DATA SEGMENT
|
|
|
-ALIGN 16
|
|
|
-L_sp256_mod_inv_avx2_4_order DWORD 6497617,32001851,62711546,67108863,67043328,0,0,0,41070783,45522014,67108863,1023,4194303,0,0,0
|
|
|
-ptr_L_sp256_mod_inv_avx2_4_order QWORD L_sp256_mod_inv_avx2_4_order
|
|
|
-_DATA ENDS
|
|
|
-_DATA SEGMENT
|
|
|
-ALIGN 16
|
|
|
-L_sp256_mod_inv_avx2_4_one QWORD 1, 0,
|
|
|
- 0, 0
|
|
|
-ptr_L_sp256_mod_inv_avx2_4_one QWORD L_sp256_mod_inv_avx2_4_one
|
|
|
-_DATA ENDS
|
|
|
-_DATA SEGMENT
|
|
|
-ALIGN 16
|
|
|
-L_sp256_mod_inv_avx2_4_all_one DWORD 1,1,1,1,1,1,1,1
|
|
|
-ptr_L_sp256_mod_inv_avx2_4_all_one QWORD L_sp256_mod_inv_avx2_4_all_one
|
|
|
-_DATA ENDS
|
|
|
-_DATA SEGMENT
|
|
|
-ALIGN 16
|
|
|
-L_sp256_mod_inv_avx2_4_mask01111 DWORD 0,1,1,1,1,0,0,0
|
|
|
-ptr_L_sp256_mod_inv_avx2_4_mask01111 QWORD L_sp256_mod_inv_avx2_4_mask01111
|
|
|
-_DATA ENDS
|
|
|
-_DATA SEGMENT
|
|
|
-ALIGN 16
|
|
|
-L_sp256_mod_inv_avx2_4_down_one_dword DWORD 1,2,3,4,5,6,7,7
|
|
|
-ptr_L_sp256_mod_inv_avx2_4_down_one_dword QWORD L_sp256_mod_inv_avx2_4_down_one_dword
|
|
|
-_DATA ENDS
|
|
|
-_DATA SEGMENT
|
|
|
-ALIGN 16
|
|
|
-L_sp256_mod_inv_avx2_4_neg DWORD 0,0,0,0,2147483648,0,0,0
|
|
|
-ptr_L_sp256_mod_inv_avx2_4_neg QWORD L_sp256_mod_inv_avx2_4_neg
|
|
|
-_DATA ENDS
|
|
|
-_DATA SEGMENT
|
|
|
-ALIGN 16
|
|
|
-L_sp256_mod_inv_avx2_4_up_one_dword DWORD 7,0,1,2,3,7,7,7
|
|
|
-ptr_L_sp256_mod_inv_avx2_4_up_one_dword QWORD L_sp256_mod_inv_avx2_4_up_one_dword
|
|
|
-_DATA ENDS
|
|
|
-_DATA SEGMENT
|
|
|
-ALIGN 16
|
|
|
-L_sp256_mod_inv_avx2_4_mask26 DWORD 67108863,67108863,67108863,67108863,67108863,0,0,0
|
|
|
-ptr_L_sp256_mod_inv_avx2_4_mask26 QWORD L_sp256_mod_inv_avx2_4_mask26
|
|
|
-_DATA ENDS
|
|
|
-; /* Non-constant time modular inversion.
|
|
|
-; *
|
|
|
-; * @param [out] r Resulting number.
|
|
|
-; * @param [in] a Number to invert.
|
|
|
-; * @param [in] m Modulus.
|
|
|
-; * @return MP_OKAY on success.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_256_mod_inv_avx2_4 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- sub rsp, 144
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- vmovdqu OWORD PTR [rsp+128], xmm14
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mov r9, QWORD PTR [r8+8]
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r12, QWORD PTR [rdx]
|
|
|
- mov r13, QWORD PTR [rdx+8]
|
|
|
- mov r14, QWORD PTR [rdx+16]
|
|
|
- mov r15, QWORD PTR [rdx+24]
|
|
|
- mov rbx, ptr_L_sp256_mod_inv_avx2_4_order
|
|
|
- vmovupd ymm6, YMMWORD PTR [rbx]
|
|
|
- vmovupd ymm7, YMMWORD PTR [rbx+32]
|
|
|
- mov rbx, ptr_L_sp256_mod_inv_avx2_4_one
|
|
|
- vmovupd ymm8, YMMWORD PTR [rbx]
|
|
|
- mov rbx, ptr_L_sp256_mod_inv_avx2_4_mask01111
|
|
|
- vmovupd ymm9, YMMWORD PTR [rbx]
|
|
|
- mov rbx, ptr_L_sp256_mod_inv_avx2_4_all_one
|
|
|
- vmovupd ymm10, YMMWORD PTR [rbx]
|
|
|
- mov rbx, ptr_L_sp256_mod_inv_avx2_4_down_one_dword
|
|
|
- vmovupd ymm11, YMMWORD PTR [rbx]
|
|
|
- mov rbx, ptr_L_sp256_mod_inv_avx2_4_neg
|
|
|
- vmovupd ymm12, YMMWORD PTR [rbx]
|
|
|
- mov rbx, ptr_L_sp256_mod_inv_avx2_4_up_one_dword
|
|
|
- vmovupd ymm13, YMMWORD PTR [rbx]
|
|
|
- mov rbx, ptr_L_sp256_mod_inv_avx2_4_mask26
|
|
|
- vmovupd ymm14, YMMWORD PTR [rbx]
|
|
|
- vpxor xmm0, xmm0, xmm0
|
|
|
- vpxor xmm1, xmm1, xmm1
|
|
|
- vmovdqu ymm2, ymm8
|
|
|
- vpxor xmm3, xmm3, xmm3
|
|
|
- test r12b, 1
|
|
|
- jnz L_256_mod_inv_avx2_4_v_even_end
|
|
|
-L_256_mod_inv_avx2_4_v_even_start:
|
|
|
- shrd r12, r13, 1
|
|
|
- shrd r13, r14, 1
|
|
|
- shrd r14, r15, 1
|
|
|
- shr r15, 1
|
|
|
- vptest ymm2, ymm8
|
|
|
- jz L_256_mod_inv_avx2_4_v_even_shr1
|
|
|
- vpaddd ymm2, ymm2, ymm6
|
|
|
- vpaddd ymm3, ymm3, ymm7
|
|
|
-L_256_mod_inv_avx2_4_v_even_shr1:
|
|
|
- vpand ymm4, ymm2, ymm9
|
|
|
- vpand ymm5, ymm3, ymm10
|
|
|
- vpermd ymm4, ymm11, ymm4
|
|
|
- vpsrad ymm2, ymm2, 1
|
|
|
- vpsrad ymm3, ymm3, 1
|
|
|
- vpslld ymm5, ymm5, 25
|
|
|
- vpslld xmm4, xmm4, 25
|
|
|
- vpaddd ymm2, ymm2, ymm5
|
|
|
- vpaddd ymm3, ymm3, ymm4
|
|
|
- test r12b, 1
|
|
|
- jz L_256_mod_inv_avx2_4_v_even_start
|
|
|
-L_256_mod_inv_avx2_4_v_even_end:
|
|
|
-L_256_mod_inv_avx2_4_uv_start:
|
|
|
- cmp r11, r15
|
|
|
- jb L_256_mod_inv_avx2_4_uv_v
|
|
|
- ja L_256_mod_inv_avx2_4_uv_u
|
|
|
- cmp r10, r14
|
|
|
- jb L_256_mod_inv_avx2_4_uv_v
|
|
|
- ja L_256_mod_inv_avx2_4_uv_u
|
|
|
- cmp r9, r13
|
|
|
- jb L_256_mod_inv_avx2_4_uv_v
|
|
|
- ja L_256_mod_inv_avx2_4_uv_u
|
|
|
- cmp rax, r12
|
|
|
- jb L_256_mod_inv_avx2_4_uv_v
|
|
|
-L_256_mod_inv_avx2_4_uv_u:
|
|
|
- sub rax, r12
|
|
|
- sbb r9, r13
|
|
|
- vpsubd ymm0, ymm0, ymm2
|
|
|
- sbb r10, r14
|
|
|
- vpsubd ymm1, ymm1, ymm3
|
|
|
- sbb r11, r15
|
|
|
- vptest ymm1, ymm12
|
|
|
- jz L_256_mod_inv_avx2_4_usubv_done_neg
|
|
|
- vpaddd ymm0, ymm0, ymm6
|
|
|
- vpaddd ymm1, ymm1, ymm7
|
|
|
-L_256_mod_inv_avx2_4_usubv_done_neg:
|
|
|
-L_256_mod_inv_avx2_4_usubv_shr1:
|
|
|
- shrd rax, r9, 1
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r11, 1
|
|
|
- shr r11, 1
|
|
|
- vptest ymm0, ymm8
|
|
|
- jz L_256_mod_inv_avx2_4_usubv_sub_shr1
|
|
|
- vpaddd ymm0, ymm0, ymm6
|
|
|
- vpaddd ymm1, ymm1, ymm7
|
|
|
-L_256_mod_inv_avx2_4_usubv_sub_shr1:
|
|
|
- vpand ymm4, ymm0, ymm9
|
|
|
- vpand ymm5, ymm1, ymm10
|
|
|
- vpermd ymm4, ymm11, ymm4
|
|
|
- vpsrad ymm0, ymm0, 1
|
|
|
- vpsrad ymm1, ymm1, 1
|
|
|
- vpslld ymm5, ymm5, 25
|
|
|
- vpslld xmm4, xmm4, 25
|
|
|
- vpaddd ymm0, ymm0, ymm5
|
|
|
- vpaddd ymm1, ymm1, ymm4
|
|
|
- test al, 1
|
|
|
- jz L_256_mod_inv_avx2_4_usubv_shr1
|
|
|
- cmp rax, 1
|
|
|
- jne L_256_mod_inv_avx2_4_uv_start
|
|
|
- mov rdx, r9
|
|
|
- or rdx, r10
|
|
|
- jne L_256_mod_inv_avx2_4_uv_start
|
|
|
- or rdx, r11
|
|
|
- jne L_256_mod_inv_avx2_4_uv_start
|
|
|
- vpextrd eax, xmm0, 0
|
|
|
- vpextrd r10d, xmm0, 1
|
|
|
- vpextrd r12d, xmm0, 2
|
|
|
- vpextrd r14d, xmm0, 3
|
|
|
- vpextrd r9d, xmm1, 0
|
|
|
- vpextrd r11d, xmm1, 1
|
|
|
- vpextrd r13d, xmm1, 2
|
|
|
- vpextrd r15d, xmm1, 3
|
|
|
- vextracti128 xmm0, ymm0, 1
|
|
|
- vextracti128 xmm1, ymm1, 1
|
|
|
- vpextrd edi, xmm0, 0
|
|
|
- vpextrd esi, xmm1, 0
|
|
|
- jmp L_256_mod_inv_avx2_4_store_done
|
|
|
-L_256_mod_inv_avx2_4_uv_v:
|
|
|
- sub r12, rax
|
|
|
- sbb r13, r9
|
|
|
- vpsubd ymm2, ymm2, ymm0
|
|
|
- sbb r14, r10
|
|
|
- vpsubd ymm3, ymm3, ymm1
|
|
|
- sbb r15, r11
|
|
|
- vptest ymm3, ymm12
|
|
|
- jz L_256_mod_inv_avx2_4_vsubu_done_neg
|
|
|
- vpaddd ymm2, ymm2, ymm6
|
|
|
- vpaddd ymm3, ymm3, ymm7
|
|
|
-L_256_mod_inv_avx2_4_vsubu_done_neg:
|
|
|
-L_256_mod_inv_avx2_4_vsubu_shr1:
|
|
|
- shrd r12, r13, 1
|
|
|
- shrd r13, r14, 1
|
|
|
- shrd r14, r15, 1
|
|
|
- shr r15, 1
|
|
|
- vptest ymm2, ymm8
|
|
|
- jz L_256_mod_inv_avx2_4_vsubu_sub_shr1
|
|
|
- vpaddd ymm2, ymm2, ymm6
|
|
|
- vpaddd ymm3, ymm3, ymm7
|
|
|
-L_256_mod_inv_avx2_4_vsubu_sub_shr1:
|
|
|
- vpand ymm4, ymm2, ymm9
|
|
|
- vpand ymm5, ymm3, ymm10
|
|
|
- vpermd ymm4, ymm11, ymm4
|
|
|
- vpsrad ymm2, ymm2, 1
|
|
|
- vpsrad ymm3, ymm3, 1
|
|
|
- vpslld ymm5, ymm5, 25
|
|
|
- vpslld xmm4, xmm4, 25
|
|
|
- vpaddd ymm2, ymm2, ymm5
|
|
|
- vpaddd ymm3, ymm3, ymm4
|
|
|
- test r12b, 1
|
|
|
- jz L_256_mod_inv_avx2_4_vsubu_shr1
|
|
|
- cmp r12, 1
|
|
|
- jne L_256_mod_inv_avx2_4_uv_start
|
|
|
- mov rdx, r13
|
|
|
- or rdx, r14
|
|
|
- jne L_256_mod_inv_avx2_4_uv_start
|
|
|
- or rdx, r15
|
|
|
- jne L_256_mod_inv_avx2_4_uv_start
|
|
|
- vpextrd eax, xmm2, 0
|
|
|
- vpextrd r10d, xmm2, 1
|
|
|
- vpextrd r12d, xmm2, 2
|
|
|
- vpextrd r14d, xmm2, 3
|
|
|
- vpextrd r9d, xmm3, 0
|
|
|
- vpextrd r11d, xmm3, 1
|
|
|
- vpextrd r13d, xmm3, 2
|
|
|
- vpextrd r15d, xmm3, 3
|
|
|
- vextracti128 xmm2, ymm2, 1
|
|
|
- vextracti128 xmm3, ymm3, 1
|
|
|
- vpextrd edi, xmm2, 0
|
|
|
- vpextrd esi, xmm3, 0
|
|
|
-L_256_mod_inv_avx2_4_store_done:
|
|
|
- mov edx, eax
|
|
|
- and eax, 67108863
|
|
|
- sar edx, 26
|
|
|
- add r9d, edx
|
|
|
- mov edx, r9d
|
|
|
- and r9d, 67108863
|
|
|
- sar edx, 26
|
|
|
- add r10d, edx
|
|
|
- mov edx, r10d
|
|
|
- and r10d, 67108863
|
|
|
- sar edx, 26
|
|
|
- add r11d, edx
|
|
|
- mov edx, r11d
|
|
|
- and r11d, 67108863
|
|
|
- sar edx, 26
|
|
|
- add r12d, edx
|
|
|
- mov edx, r12d
|
|
|
- and r12d, 67108863
|
|
|
- sar edx, 26
|
|
|
- add r13d, edx
|
|
|
- mov edx, r13d
|
|
|
- and r13d, 67108863
|
|
|
- sar edx, 26
|
|
|
- add r14d, edx
|
|
|
- mov edx, r14d
|
|
|
- and r14d, 67108863
|
|
|
- sar edx, 26
|
|
|
- add r15d, edx
|
|
|
- mov edx, r15d
|
|
|
- and r15d, 67108863
|
|
|
- sar edx, 26
|
|
|
- add edi, edx
|
|
|
- mov edx, edi
|
|
|
- and edi, 67108863
|
|
|
- sar edx, 26
|
|
|
- add esi, edx
|
|
|
- movsxd r9, r9d
|
|
|
- movsxd r11, r11d
|
|
|
- movsxd r13, r13d
|
|
|
- movsxd r15, r15d
|
|
|
- movsxd rsi, esi
|
|
|
- shl r9, 26
|
|
|
- shl r11, 26
|
|
|
- shl r13, 26
|
|
|
- shl r15, 26
|
|
|
- shl rsi, 26
|
|
|
- movsxd rax, eax
|
|
|
- add rax, r9
|
|
|
- movsxd r10, r10d
|
|
|
- adc r10, r11
|
|
|
- movsxd r12, r12d
|
|
|
- adc r12, r13
|
|
|
- movsxd r14, r14d
|
|
|
- adc r14, r15
|
|
|
- movsxd rdi, edi
|
|
|
- adc rdi, rsi
|
|
|
- jge L_256_mod_inv_avx2_4_3_no_add_order
|
|
|
- mov r9, 2756213597218129
|
|
|
- mov r11, 3054930678533947
|
|
|
- mov r13, 4503599622973178
|
|
|
- mov r15, 68719476735
|
|
|
- mov rsi, 281474976645120
|
|
|
- add rax, r9
|
|
|
- add r10, r11
|
|
|
- add r12, r13
|
|
|
- add r14, r15
|
|
|
- add rdi, rsi
|
|
|
- mov rdx, 4503599627370495
|
|
|
- mov r9, rax
|
|
|
- and rax, rdx
|
|
|
- sar r9, 52
|
|
|
- add r10, r9
|
|
|
- mov r11, r10
|
|
|
- and r10, rdx
|
|
|
- sar r11, 52
|
|
|
- add r12, r11
|
|
|
- mov r13, r12
|
|
|
- and r12, rdx
|
|
|
- sar r13, 52
|
|
|
- add r14, r13
|
|
|
- mov r15, r14
|
|
|
- and r14, rdx
|
|
|
- sar r15, 52
|
|
|
- add rdi, r15
|
|
|
-L_256_mod_inv_avx2_4_3_no_add_order:
|
|
|
- mov r9, r10
|
|
|
- mov r11, r12
|
|
|
- mov r13, r14
|
|
|
- shl r9, 52
|
|
|
- sar r10, 12
|
|
|
- shl r11, 40
|
|
|
- sar r12, 24
|
|
|
- shl r13, 28
|
|
|
- sar r14, 36
|
|
|
- shl rdi, 16
|
|
|
- add rax, r9
|
|
|
- adc r10, r11
|
|
|
- adc r12, r13
|
|
|
- adc r14, rdi
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- mov QWORD PTR [rcx+24], r14
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- vmovdqu xmm14, OWORD PTR [rsp+128]
|
|
|
- add rsp, 144
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_256_mod_inv_avx2_4 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-IFDEF WOLFSSL_SP_384
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mul_6 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 48
|
|
|
- ; A[0] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r11, rdx
|
|
|
- ; A[0] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- ; A[0] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+16], r12
|
|
|
- ; A[0] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- ; A[0] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- ; A[0] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- ; A[1] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- ; A[2] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- ; A[3] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- ; A[4] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- ; A[5] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r10, QWORD PTR [rsp+16]
|
|
|
- mov r11, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- mov rdx, QWORD PTR [rsp+40]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], rdx
|
|
|
- add rsp, 48
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mul_6 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r Result of multiplication.
|
|
|
-; * a First number to multiply.
|
|
|
-; * b Second number to multiply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mul_avx2_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov rax, rdx
|
|
|
- sub rsp, 40
|
|
|
- xor rbx, rbx
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- ; A[0] * B[0]
|
|
|
- mulx r12, r11, QWORD PTR [r8]
|
|
|
- ; A[0] * B[1]
|
|
|
- mulx r13, r9, QWORD PTR [r8+8]
|
|
|
- adcx r12, r9
|
|
|
- ; A[0] * B[2]
|
|
|
- mulx r14, r9, QWORD PTR [r8+16]
|
|
|
- adcx r13, r9
|
|
|
- ; A[0] * B[3]
|
|
|
- mulx r15, r9, QWORD PTR [r8+24]
|
|
|
- adcx r14, r9
|
|
|
- ; A[0] * B[4]
|
|
|
- mulx rdi, r9, QWORD PTR [r8+32]
|
|
|
- adcx r15, r9
|
|
|
- ; A[0] * B[5]
|
|
|
- mulx rsi, r9, QWORD PTR [r8+40]
|
|
|
- adcx rdi, r9
|
|
|
- adcx rsi, rbx
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- mov r11, 0
|
|
|
- adcx r11, rbx
|
|
|
- xor rbx, rbx
|
|
|
- mov rdx, QWORD PTR [rax+8]
|
|
|
- ; A[1] * B[0]
|
|
|
- mulx r10, r9, QWORD PTR [r8]
|
|
|
- adcx r12, r9
|
|
|
- adox r13, r10
|
|
|
- ; A[1] * B[1]
|
|
|
- mulx r10, r9, QWORD PTR [r8+8]
|
|
|
- adcx r13, r9
|
|
|
- adox r14, r10
|
|
|
- ; A[1] * B[2]
|
|
|
- mulx r10, r9, QWORD PTR [r8+16]
|
|
|
- adcx r14, r9
|
|
|
- adox r15, r10
|
|
|
- ; A[1] * B[3]
|
|
|
- mulx r10, r9, QWORD PTR [r8+24]
|
|
|
- adcx r15, r9
|
|
|
- adox rdi, r10
|
|
|
- ; A[1] * B[4]
|
|
|
- mulx r10, r9, QWORD PTR [r8+32]
|
|
|
- adcx rdi, r9
|
|
|
- adox rsi, r10
|
|
|
- ; A[1] * B[5]
|
|
|
- mulx r10, r9, QWORD PTR [r8+40]
|
|
|
- adcx rsi, r9
|
|
|
- adox r11, r10
|
|
|
- adcx r11, rbx
|
|
|
- mov QWORD PTR [rsp+8], r12
|
|
|
- mov r12, 0
|
|
|
- adcx r12, rbx
|
|
|
- adox r12, rbx
|
|
|
- xor rbx, rbx
|
|
|
- mov rdx, QWORD PTR [rax+16]
|
|
|
- ; A[2] * B[0]
|
|
|
- mulx r10, r9, QWORD PTR [r8]
|
|
|
- adcx r13, r9
|
|
|
- adox r14, r10
|
|
|
- ; A[2] * B[1]
|
|
|
- mulx r10, r9, QWORD PTR [r8+8]
|
|
|
- adcx r14, r9
|
|
|
- adox r15, r10
|
|
|
- ; A[2] * B[2]
|
|
|
- mulx r10, r9, QWORD PTR [r8+16]
|
|
|
- adcx r15, r9
|
|
|
- adox rdi, r10
|
|
|
- ; A[2] * B[3]
|
|
|
- mulx r10, r9, QWORD PTR [r8+24]
|
|
|
- adcx rdi, r9
|
|
|
- adox rsi, r10
|
|
|
- ; A[2] * B[4]
|
|
|
- mulx r10, r9, QWORD PTR [r8+32]
|
|
|
- adcx rsi, r9
|
|
|
- adox r11, r10
|
|
|
- ; A[2] * B[5]
|
|
|
- mulx r10, r9, QWORD PTR [r8+40]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- adcx r12, rbx
|
|
|
- mov QWORD PTR [rsp+16], r13
|
|
|
- mov r13, 0
|
|
|
- adcx r13, rbx
|
|
|
- adox r13, rbx
|
|
|
- xor rbx, rbx
|
|
|
- mov rdx, QWORD PTR [rax+24]
|
|
|
- ; A[3] * B[0]
|
|
|
- mulx r10, r9, QWORD PTR [r8]
|
|
|
- adcx r14, r9
|
|
|
- adox r15, r10
|
|
|
- ; A[3] * B[1]
|
|
|
- mulx r10, r9, QWORD PTR [r8+8]
|
|
|
- adcx r15, r9
|
|
|
- adox rdi, r10
|
|
|
- ; A[3] * B[2]
|
|
|
- mulx r10, r9, QWORD PTR [r8+16]
|
|
|
- adcx rdi, r9
|
|
|
- adox rsi, r10
|
|
|
- ; A[3] * B[3]
|
|
|
- mulx r10, r9, QWORD PTR [r8+24]
|
|
|
- adcx rsi, r9
|
|
|
- adox r11, r10
|
|
|
- ; A[3] * B[4]
|
|
|
- mulx r10, r9, QWORD PTR [r8+32]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- ; A[3] * B[5]
|
|
|
- mulx r10, r9, QWORD PTR [r8+40]
|
|
|
- adcx r12, r9
|
|
|
- adox r13, r10
|
|
|
- adcx r13, rbx
|
|
|
- mov QWORD PTR [rsp+24], r14
|
|
|
- mov r14, 0
|
|
|
- adcx r14, rbx
|
|
|
- adox r14, rbx
|
|
|
- xor rbx, rbx
|
|
|
- mov rdx, QWORD PTR [rax+32]
|
|
|
- ; A[4] * B[0]
|
|
|
- mulx r10, r9, QWORD PTR [r8]
|
|
|
- adcx r15, r9
|
|
|
- adox rdi, r10
|
|
|
- ; A[4] * B[1]
|
|
|
- mulx r10, r9, QWORD PTR [r8+8]
|
|
|
- adcx rdi, r9
|
|
|
- adox rsi, r10
|
|
|
- ; A[4] * B[2]
|
|
|
- mulx r10, r9, QWORD PTR [r8+16]
|
|
|
- adcx rsi, r9
|
|
|
- adox r11, r10
|
|
|
- ; A[4] * B[3]
|
|
|
- mulx r10, r9, QWORD PTR [r8+24]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- ; A[4] * B[4]
|
|
|
- mulx r10, r9, QWORD PTR [r8+32]
|
|
|
- adcx r12, r9
|
|
|
- adox r13, r10
|
|
|
- ; A[4] * B[5]
|
|
|
- mulx r10, r9, QWORD PTR [r8+40]
|
|
|
- adcx r13, r9
|
|
|
- adox r14, r10
|
|
|
- adcx r14, rbx
|
|
|
- mov QWORD PTR [rsp+32], r15
|
|
|
- mov rdx, QWORD PTR [rax+40]
|
|
|
- ; A[5] * B[0]
|
|
|
- mulx r10, r9, QWORD PTR [r8]
|
|
|
- adcx rdi, r9
|
|
|
- adox rsi, r10
|
|
|
- ; A[5] * B[1]
|
|
|
- mulx r10, r9, QWORD PTR [r8+8]
|
|
|
- adcx rsi, r9
|
|
|
- adox r11, r10
|
|
|
- ; A[5] * B[2]
|
|
|
- mulx r10, r9, QWORD PTR [r8+16]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- ; A[5] * B[3]
|
|
|
- mulx r10, r9, QWORD PTR [r8+24]
|
|
|
- adcx r12, r9
|
|
|
- adox r13, r10
|
|
|
- ; A[5] * B[4]
|
|
|
- mulx r10, r9, QWORD PTR [r8+32]
|
|
|
- adcx r13, r9
|
|
|
- adox r14, r10
|
|
|
- ; A[5] * B[5]
|
|
|
- mulx r15, r9, QWORD PTR [r8+40]
|
|
|
- adcx r14, r9
|
|
|
- adox r15, rbx
|
|
|
- adcx r15, rbx
|
|
|
- mov QWORD PTR [rcx+40], rdi
|
|
|
- mov QWORD PTR [rcx+48], rsi
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- mov QWORD PTR [rcx+72], r13
|
|
|
- mov QWORD PTR [rcx+80], r14
|
|
|
- mov QWORD PTR [rcx+88], r15
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- mov r12, QWORD PTR [rsp+8]
|
|
|
- mov r13, QWORD PTR [rsp+16]
|
|
|
- mov r14, QWORD PTR [rsp+24]
|
|
|
- mov r15, QWORD PTR [rsp+32]
|
|
|
- mov QWORD PTR [rcx], r11
|
|
|
- mov QWORD PTR [rcx+8], r12
|
|
|
- mov QWORD PTR [rcx+16], r13
|
|
|
- mov QWORD PTR [rcx+24], r14
|
|
|
- mov QWORD PTR [rcx+32], r15
|
|
|
- add rsp, 40
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mul_avx2_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_sqr_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- mov r8, rdx
|
|
|
- sub rsp, 48
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul rax
|
|
|
- xor r11, r11
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r10, rdx
|
|
|
- ; A[0] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsp+8], r10
|
|
|
- ; A[0] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+16], r11
|
|
|
- ; A[0] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+24], r9
|
|
|
- ; A[0] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[1] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- ; A[0] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- ; A[1] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul rax
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- ; A[2] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[3] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- ; A[3] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- ; A[4] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- ; A[5] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r12, QWORD PTR [rsp+16]
|
|
|
- mov r13, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- mov QWORD PTR [rcx+24], r13
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- mov rdx, QWORD PTR [rsp+40]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], rdx
|
|
|
- add rsp, 48
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_sqr_6 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r Result of squaring.
|
|
|
-; * a Number to square in Montgomery form.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_sqr_avx2_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- mov rax, rdx
|
|
|
- push rcx
|
|
|
- xor rcx, rcx
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- mov rsi, QWORD PTR [rax+8]
|
|
|
- mov rbx, QWORD PTR [rax+16]
|
|
|
- mov rbp, QWORD PTR [rax+24]
|
|
|
- ; Diagonal 0
|
|
|
- ; A[1] * A[0]
|
|
|
- mulx r11, r10, QWORD PTR [rax+8]
|
|
|
- ; A[2] * A[0]
|
|
|
- mulx r12, r8, QWORD PTR [rax+16]
|
|
|
- adcx r11, r8
|
|
|
- ; A[3] * A[0]
|
|
|
- mulx r13, r8, QWORD PTR [rax+24]
|
|
|
- adcx r12, r8
|
|
|
- ; A[4] * A[0]
|
|
|
- mulx r14, r8, QWORD PTR [rax+32]
|
|
|
- adcx r13, r8
|
|
|
- ; A[5] * A[0]
|
|
|
- mulx r15, r8, QWORD PTR [rax+40]
|
|
|
- adcx r14, r8
|
|
|
- adcx r15, rcx
|
|
|
- ; Diagonal 1
|
|
|
- mov rdx, rsi
|
|
|
- ; A[2] * A[1]
|
|
|
- mulx r9, r8, QWORD PTR [rax+16]
|
|
|
- adcx r12, r8
|
|
|
- adox r13, r9
|
|
|
- ; A[3] * A[1]
|
|
|
- mulx r9, r8, QWORD PTR [rax+24]
|
|
|
- adcx r13, r8
|
|
|
- adox r14, r9
|
|
|
- ; A[4] * A[1]
|
|
|
- mulx r9, r8, QWORD PTR [rax+32]
|
|
|
- adcx r14, r8
|
|
|
- adox r15, r9
|
|
|
- ; A[5] * A[1]
|
|
|
- mulx rdi, r8, QWORD PTR [rax+40]
|
|
|
- adcx r15, r8
|
|
|
- adox rdi, rcx
|
|
|
- mov rdx, rbx
|
|
|
- ; A[5] * A[2]
|
|
|
- mulx rsi, r8, QWORD PTR [rax+40]
|
|
|
- adcx rdi, r8
|
|
|
- adox rsi, rcx
|
|
|
- adcx rsi, rcx
|
|
|
- adcx rbx, rcx
|
|
|
- ; Diagonal 2
|
|
|
- ; A[3] * A[2]
|
|
|
- mulx r9, r8, QWORD PTR [rax+24]
|
|
|
- adcx r14, r8
|
|
|
- adox r15, r9
|
|
|
- ; A[4] * A[2]
|
|
|
- mulx r9, r8, QWORD PTR [rax+32]
|
|
|
- adcx r15, r8
|
|
|
- adox rdi, r9
|
|
|
- mov rdx, rbp
|
|
|
- ; A[4] * A[3]
|
|
|
- mulx r9, r8, QWORD PTR [rax+32]
|
|
|
- adcx rdi, r8
|
|
|
- adox rsi, r9
|
|
|
- ; A[5] * A[3]
|
|
|
- mulx rbx, r8, QWORD PTR [rax+40]
|
|
|
- adcx rsi, r8
|
|
|
- adox rbx, rcx
|
|
|
- mov rdx, QWORD PTR [rax+32]
|
|
|
- ; A[5] * A[4]
|
|
|
- mulx rbp, r8, QWORD PTR [rax+40]
|
|
|
- adcx rbx, r8
|
|
|
- adox rbp, rcx
|
|
|
- adcx rbp, rcx
|
|
|
- adcx rcx, rcx
|
|
|
- ; Doubling previous result as we add in square words results
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rdx, QWORD PTR [rax]
|
|
|
- mulx r9, r8, rdx
|
|
|
- pop rdx
|
|
|
- mov QWORD PTR [rdx], r8
|
|
|
- adox r10, r10
|
|
|
- push rdx
|
|
|
- adcx r10, r9
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rdx, QWORD PTR [rax+8]
|
|
|
- mulx r9, r8, rdx
|
|
|
- adox r11, r11
|
|
|
- adcx r11, r8
|
|
|
- adox r12, r12
|
|
|
- adcx r12, r9
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rdx, QWORD PTR [rax+16]
|
|
|
- mulx r9, r8, rdx
|
|
|
- adox r13, r13
|
|
|
- adcx r13, r8
|
|
|
- adox r14, r14
|
|
|
- adcx r14, r9
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rdx, QWORD PTR [rax+24]
|
|
|
- mulx r9, r8, rdx
|
|
|
- adox r15, r15
|
|
|
- adcx r15, r8
|
|
|
- adox rdi, rdi
|
|
|
- adcx rdi, r9
|
|
|
- ; A[4] * A[4]
|
|
|
- mov rdx, QWORD PTR [rax+32]
|
|
|
- mulx r9, r8, rdx
|
|
|
- adox rsi, rsi
|
|
|
- adcx rsi, r8
|
|
|
- adox rbx, rbx
|
|
|
- adcx rbx, r9
|
|
|
- ; A[5] * A[5]
|
|
|
- mov rdx, QWORD PTR [rax+40]
|
|
|
- mulx r9, r8, rdx
|
|
|
- adox rbp, rbp
|
|
|
- adcx rbp, r8
|
|
|
- adcx r9, rcx
|
|
|
- mov r8, 0
|
|
|
- adox r9, r8
|
|
|
- pop rcx
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- mov QWORD PTR [rcx+32], r13
|
|
|
- mov QWORD PTR [rcx+40], r14
|
|
|
- mov QWORD PTR [rcx+48], r15
|
|
|
- mov QWORD PTR [rcx+56], rdi
|
|
|
- mov QWORD PTR [rcx+64], rsi
|
|
|
- mov QWORD PTR [rcx+72], rbx
|
|
|
- mov QWORD PTR [rcx+80], rbp
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_sqr_avx2_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Add b to a into r. (r = a + b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_add_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- xor rax, rax
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov r11, QWORD PTR [rdx+16]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- mov r13, QWORD PTR [rdx+32]
|
|
|
- mov r14, QWORD PTR [rdx+40]
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- adc r11, QWORD PTR [r8+16]
|
|
|
- adc r12, QWORD PTR [r8+24]
|
|
|
- adc r13, QWORD PTR [r8+32]
|
|
|
- adc r14, QWORD PTR [r8+40]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- mov QWORD PTR [rcx+32], r13
|
|
|
- mov QWORD PTR [rcx+40], r14
|
|
|
- adc rax, 0
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_add_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Sub b from a into r. (r = a - b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_sub_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- xor rax, rax
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov r11, QWORD PTR [rdx+16]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- mov r13, QWORD PTR [rdx+32]
|
|
|
- mov r14, QWORD PTR [rdx+40]
|
|
|
- sub r9, QWORD PTR [r8]
|
|
|
- sbb r10, QWORD PTR [r8+8]
|
|
|
- sbb r11, QWORD PTR [r8+16]
|
|
|
- sbb r12, QWORD PTR [r8+24]
|
|
|
- sbb r13, QWORD PTR [r8+32]
|
|
|
- sbb r14, QWORD PTR [r8+40]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- mov QWORD PTR [rcx+32], r13
|
|
|
- mov QWORD PTR [rcx+40], r14
|
|
|
- sbb rax, rax
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_sub_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Conditionally copy a into r using the mask m.
|
|
|
-; * m is -1 to copy and 0 when not.
|
|
|
-; *
|
|
|
-; * r A single precision number to copy over.
|
|
|
-; * a A single precision number to copy.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_cond_copy_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [rcx+16]
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- mov r12, QWORD PTR [rcx+32]
|
|
|
- mov r13, QWORD PTR [rcx+40]
|
|
|
- xor rax, QWORD PTR [rdx]
|
|
|
- xor r9, QWORD PTR [rdx+8]
|
|
|
- xor r10, QWORD PTR [rdx+16]
|
|
|
- xor r11, QWORD PTR [rdx+24]
|
|
|
- xor r12, QWORD PTR [rdx+32]
|
|
|
- xor r13, QWORD PTR [rdx+40]
|
|
|
- and rax, r8
|
|
|
- and r9, r8
|
|
|
- and r10, r8
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- and r13, r8
|
|
|
- xor QWORD PTR [rcx], rax
|
|
|
- xor QWORD PTR [rcx+8], r9
|
|
|
- xor QWORD PTR [rcx+16], r10
|
|
|
- xor QWORD PTR [rcx+24], r11
|
|
|
- xor QWORD PTR [rcx+32], r12
|
|
|
- xor QWORD PTR [rcx+40], r13
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_cond_copy_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_cond_sub_6 PROC
|
|
|
- sub rsp, 48
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rsp]
|
|
|
- sub r10, r8
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rsp+8]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r8, QWORD PTR [rsp+16]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rsp+24]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rsp+32]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rsp+40]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- sbb rax, rax
|
|
|
- add rsp, 48
|
|
|
- ret
|
|
|
-sp_384_cond_sub_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 384 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mont_reduce_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- mov r12, QWORD PTR [rcx]
|
|
|
- mov r13, QWORD PTR [rcx+8]
|
|
|
- mov r14, QWORD PTR [rcx+16]
|
|
|
- mov r15, QWORD PTR [rcx+24]
|
|
|
- mov rdi, QWORD PTR [rcx+32]
|
|
|
- mov rsi, QWORD PTR [rcx+40]
|
|
|
- xor r11, r11
|
|
|
- ; a[0-7] += m[0-5] * mu[0..1] = m[0-5] * (a[0..1] * mp)
|
|
|
- mov rbx, QWORD PTR [rcx+48]
|
|
|
- mov rbp, QWORD PTR [rcx+56]
|
|
|
- mov rdx, r12
|
|
|
- mov rax, r13
|
|
|
- shld rax, rdx, 32
|
|
|
- shl rdx, 32
|
|
|
- add rdx, r12
|
|
|
- adc rax, r13
|
|
|
- add rax, r12
|
|
|
- mov r8, rdx
|
|
|
- mov r9, rax
|
|
|
- mov r10, rax
|
|
|
- shld r9, r8, 32
|
|
|
- shl r8, 32
|
|
|
- shr r10, 32
|
|
|
- add r12, r8
|
|
|
- adc r13, r9
|
|
|
- adc r14, r10
|
|
|
- adc r15, 0
|
|
|
- adc rdi, 0
|
|
|
- adc rsi, 0
|
|
|
- adc rbx, rdx
|
|
|
- adc rbp, rax
|
|
|
- adc r11, 0
|
|
|
- add r8, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, rax
|
|
|
- mov rax, 0
|
|
|
- adc rax, 0
|
|
|
- sub r14, r9
|
|
|
- sbb r15, r10
|
|
|
- sbb rdi, rax
|
|
|
- sbb rsi, 0
|
|
|
- sbb rbx, 0
|
|
|
- sbb rbp, 0
|
|
|
- sbb r11, 0
|
|
|
- ; a[2-9] += m[0-5] * mu[0..1] = m[0-5] * (a[2..3] * mp)
|
|
|
- mov r12, QWORD PTR [rcx+64]
|
|
|
- mov r13, QWORD PTR [rcx+72]
|
|
|
- mov rdx, r14
|
|
|
- mov rax, r15
|
|
|
- shld rax, rdx, 32
|
|
|
- shl rdx, 32
|
|
|
- add rdx, r14
|
|
|
- adc rax, r15
|
|
|
- add rax, r14
|
|
|
- mov r8, rdx
|
|
|
- mov r9, rax
|
|
|
- mov r10, rax
|
|
|
- shld r9, r8, 32
|
|
|
- shl r8, 32
|
|
|
- shr r10, 32
|
|
|
- add r12, r11
|
|
|
- adc r13, 0
|
|
|
- mov r11, 0
|
|
|
- adc r11, 0
|
|
|
- add r14, r8
|
|
|
- adc r15, r9
|
|
|
- adc rdi, r10
|
|
|
- adc rsi, 0
|
|
|
- adc rbx, 0
|
|
|
- adc rbp, 0
|
|
|
- adc r12, rdx
|
|
|
- adc r13, rax
|
|
|
- adc r11, 0
|
|
|
- add r8, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, rax
|
|
|
- mov rax, 0
|
|
|
- adc rax, 0
|
|
|
- sub rdi, r9
|
|
|
- sbb rsi, r10
|
|
|
- sbb rbx, rax
|
|
|
- sbb rbp, 0
|
|
|
- sbb r12, 0
|
|
|
- sbb r13, 0
|
|
|
- sbb r11, 0
|
|
|
- ; a[4-11] += m[0-5] * mu[0..1] = m[0-5] * (a[4..5] * mp)
|
|
|
- mov r14, QWORD PTR [rcx+80]
|
|
|
- mov r15, QWORD PTR [rcx+88]
|
|
|
- mov rdx, rdi
|
|
|
- mov rax, rsi
|
|
|
- shld rax, rdx, 32
|
|
|
- shl rdx, 32
|
|
|
- add rdx, rdi
|
|
|
- adc rax, rsi
|
|
|
- add rax, rdi
|
|
|
- mov r8, rdx
|
|
|
- mov r9, rax
|
|
|
- mov r10, rax
|
|
|
- shld r9, r8, 32
|
|
|
- shl r8, 32
|
|
|
- shr r10, 32
|
|
|
- add r14, r11
|
|
|
- adc r15, 0
|
|
|
- mov r11, 0
|
|
|
- adc r11, 0
|
|
|
- add rdi, r8
|
|
|
- adc rsi, r9
|
|
|
- adc rbx, r10
|
|
|
- adc rbp, 0
|
|
|
- adc r12, 0
|
|
|
- adc r13, 0
|
|
|
- adc r14, rdx
|
|
|
- adc r15, rax
|
|
|
- adc r11, 0
|
|
|
- add r8, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, rax
|
|
|
- mov rax, 0
|
|
|
- adc rax, 0
|
|
|
- sub rbx, r9
|
|
|
- sbb rbp, r10
|
|
|
- sbb r12, rax
|
|
|
- sbb r13, 0
|
|
|
- sbb r14, 0
|
|
|
- sbb r15, 0
|
|
|
- sbb r11, 0
|
|
|
- ; Subtract mod if carry
|
|
|
- neg r11
|
|
|
- mov r10, 18446744073709551614
|
|
|
- mov r8d, r11d
|
|
|
- mov r9, r11
|
|
|
- and r10, r11
|
|
|
- shl r9, 32
|
|
|
- sub rbx, r8
|
|
|
- sbb rbp, r9
|
|
|
- sbb r12, r10
|
|
|
- sbb r13, r11
|
|
|
- sbb r14, r11
|
|
|
- sbb r15, r11
|
|
|
- mov QWORD PTR [rcx], rbx
|
|
|
- mov QWORD PTR [rcx+8], rbp
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- mov QWORD PTR [rcx+24], r13
|
|
|
- mov QWORD PTR [rcx+32], r14
|
|
|
- mov QWORD PTR [rcx+40], r15
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mont_reduce_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 384 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mont_reduce_order_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov r9, rdx
|
|
|
- xor rsi, rsi
|
|
|
- ; i = 6
|
|
|
- mov r10, 6
|
|
|
- mov r15, QWORD PTR [rcx]
|
|
|
- mov rdi, QWORD PTR [rcx+8]
|
|
|
-L_384_mont_reduce_order_6_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov r13, r15
|
|
|
- imul r13, r8
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- add r15, rax
|
|
|
- adc r12, rdx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- mov r15, rdi
|
|
|
- add r15, rax
|
|
|
- adc r11, rdx
|
|
|
- add r15, r12
|
|
|
- adc r11, 0
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- mov rdi, QWORD PTR [rcx+16]
|
|
|
- add rdi, rax
|
|
|
- adc r12, rdx
|
|
|
- add rdi, r11
|
|
|
- adc r12, 0
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- mov r14, QWORD PTR [rcx+24]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+24], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- mov r14, QWORD PTR [rcx+32]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+32], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mov rax, r13
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- mov r14, QWORD PTR [rcx+40]
|
|
|
- add r12, rax
|
|
|
- adc rdx, rsi
|
|
|
- mov rsi, 0
|
|
|
- adc rsi, 0
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+40], r14
|
|
|
- adc QWORD PTR [rcx+48], rdx
|
|
|
- adc rsi, 0
|
|
|
- ; i -= 1
|
|
|
- add rcx, 8
|
|
|
- dec r10
|
|
|
- jnz L_384_mont_reduce_order_6_loop
|
|
|
- mov QWORD PTR [rcx], r15
|
|
|
- mov QWORD PTR [rcx+8], rdi
|
|
|
- neg rsi
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, r9
|
|
|
- mov r9, rsi
|
|
|
-ELSE
|
|
|
- mov r9, rsi
|
|
|
- mov r8, r9
|
|
|
-ENDIF
|
|
|
- mov rdx, rcx
|
|
|
- mov rcx, rcx
|
|
|
- sub rcx, 48
|
|
|
- call sp_384_cond_sub_6
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mont_reduce_order_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Compare a with b in constant time.
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; * return -ve, 0 or +ve if a is less than, equal to or greater than b
|
|
|
-; * respectively.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_cmp_6 PROC
|
|
|
- push r12
|
|
|
- xor r9, r9
|
|
|
- mov r8, -1
|
|
|
- mov rax, -1
|
|
|
- mov r10, 1
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+32]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+8]
|
|
|
- mov r12, QWORD PTR [rdx+8]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx]
|
|
|
- mov r12, QWORD PTR [rdx]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- xor rax, r8
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_cmp_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Add two Montgomery form numbers (r = a + b % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a First number to add in Montgomery form.
|
|
|
-; * b Second number to add in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mont_add_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- mov r13, QWORD PTR [rdx+40]
|
|
|
- add rax, QWORD PTR [r8]
|
|
|
- adc r9, QWORD PTR [r8+8]
|
|
|
- mov r15, 18446744069414584320
|
|
|
- adc r10, QWORD PTR [r8+16]
|
|
|
- mov rdi, 18446744073709551614
|
|
|
- adc r11, QWORD PTR [r8+24]
|
|
|
- adc r12, QWORD PTR [r8+32]
|
|
|
- adc r13, QWORD PTR [r8+40]
|
|
|
- sbb rdx, rdx
|
|
|
- mov r14d, edx
|
|
|
- and r15, rdx
|
|
|
- and rdi, rdx
|
|
|
- sub rax, r14
|
|
|
- sbb r9, r15
|
|
|
- sbb r10, rdi
|
|
|
- sbb r11, rdx
|
|
|
- sbb r12, rdx
|
|
|
- sbb r13, rdx
|
|
|
- adc rdx, 0
|
|
|
- and r14, rdx
|
|
|
- and r15, rdx
|
|
|
- and rdi, rdx
|
|
|
- sub rax, r14
|
|
|
- sbb r9, r15
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- sbb r10, rdi
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- sbb r11, rdx
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- sbb r12, rdx
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- sbb r13, rdx
|
|
|
- mov QWORD PTR [rcx+32], r12
|
|
|
- mov QWORD PTR [rcx+40], r13
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mont_add_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Double a Montgomery form number (r = a + a % m).
|
|
|
-; *
|
|
|
-; * r Result of doubling.
|
|
|
-; * a Number to double in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mont_dbl_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- add rax, rax
|
|
|
- adc r8, r8
|
|
|
- mov r14, 18446744069414584320
|
|
|
- adc r9, r9
|
|
|
- mov r15, 18446744073709551614
|
|
|
- adc r10, r10
|
|
|
- adc r11, r11
|
|
|
- mov rdi, r12
|
|
|
- adc r12, r12
|
|
|
- sar rdi, 63
|
|
|
- mov r13d, edi
|
|
|
- and r14, rdi
|
|
|
- and r15, rdi
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r14
|
|
|
- sbb r9, r15
|
|
|
- sbb r10, rdi
|
|
|
- sbb r11, rdi
|
|
|
- sbb r12, rdi
|
|
|
- adc rdi, 0
|
|
|
- and r13, rdi
|
|
|
- and r14, rdi
|
|
|
- and r15, rdi
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r14
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- sbb r9, r15
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- sbb r10, rdi
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- sbb r11, rdi
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r12, rdi
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mont_dbl_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Double a Montgomery form number (r = a + a % m).
|
|
|
-; *
|
|
|
-; * r Result of doubling.
|
|
|
-; * a Number to double in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mont_tpl_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- add rax, rax
|
|
|
- adc r8, r8
|
|
|
- mov r14, 18446744069414584320
|
|
|
- adc r9, r9
|
|
|
- mov r15, 18446744073709551614
|
|
|
- adc r10, r10
|
|
|
- adc r11, r11
|
|
|
- adc r12, r12
|
|
|
- sbb rdi, rdi
|
|
|
- mov r13d, edi
|
|
|
- and r14, rdi
|
|
|
- and r15, rdi
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r14
|
|
|
- sbb r9, r15
|
|
|
- sbb r10, rdi
|
|
|
- sbb r11, rdi
|
|
|
- sbb r12, rdi
|
|
|
- adc rdi, 0
|
|
|
- and r13, rdi
|
|
|
- and r14, rdi
|
|
|
- and r15, rdi
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r14
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- sbb r9, r15
|
|
|
- sbb r10, rdi
|
|
|
- sbb r11, rdi
|
|
|
- sbb r12, rdi
|
|
|
- add rax, QWORD PTR [rdx]
|
|
|
- adc r8, QWORD PTR [rdx+8]
|
|
|
- mov r14, 18446744069414584320
|
|
|
- adc r9, QWORD PTR [rdx+16]
|
|
|
- mov r15, 18446744073709551614
|
|
|
- adc r10, QWORD PTR [rdx+24]
|
|
|
- adc r11, QWORD PTR [rdx+32]
|
|
|
- adc r12, QWORD PTR [rdx+40]
|
|
|
- sbb rdi, rdi
|
|
|
- mov r13d, edi
|
|
|
- and r14, rdi
|
|
|
- and r15, rdi
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r14
|
|
|
- sbb r9, r15
|
|
|
- sbb r10, rdi
|
|
|
- sbb r11, rdi
|
|
|
- sbb r12, rdi
|
|
|
- adc rdi, 0
|
|
|
- and r13, rdi
|
|
|
- and r14, rdi
|
|
|
- and r15, rdi
|
|
|
- sub rax, r13
|
|
|
- sbb r8, r14
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- sbb r9, r15
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- sbb r10, rdi
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- sbb r11, rdi
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r12, rdi
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mont_tpl_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Subtract two Montgomery form numbers (r = a - b % m).
|
|
|
-; *
|
|
|
-; * r Result of subtration.
|
|
|
-; * a Number to subtract from in Montgomery form.
|
|
|
-; * b Number to subtract with in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mont_sub_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- mov r13, QWORD PTR [rdx+40]
|
|
|
- sub rax, QWORD PTR [r8]
|
|
|
- sbb r9, QWORD PTR [r8+8]
|
|
|
- mov r15, 18446744069414584320
|
|
|
- sbb r10, QWORD PTR [r8+16]
|
|
|
- mov rdi, 18446744073709551614
|
|
|
- sbb r11, QWORD PTR [r8+24]
|
|
|
- sbb r12, QWORD PTR [r8+32]
|
|
|
- sbb r13, QWORD PTR [r8+40]
|
|
|
- sbb rdx, rdx
|
|
|
- mov r14d, edx
|
|
|
- and r15, rdx
|
|
|
- and rdi, rdx
|
|
|
- add rax, r14
|
|
|
- adc r9, r15
|
|
|
- adc r10, rdi
|
|
|
- adc r11, rdx
|
|
|
- adc r12, rdx
|
|
|
- adc r13, rdx
|
|
|
- adc rdx, 0
|
|
|
- and r14, rdx
|
|
|
- and r15, rdx
|
|
|
- and rdi, rdx
|
|
|
- add rax, r14
|
|
|
- adc r9, r15
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- adc r10, rdi
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- adc r11, rdx
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- adc r12, rdx
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- adc r13, rdx
|
|
|
- mov QWORD PTR [rcx+32], r12
|
|
|
- mov QWORD PTR [rcx+40], r13
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mont_sub_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
|
|
|
-; *
|
|
|
-; * r Result of division by 2.
|
|
|
-; * a Number to divide.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mont_div2_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- sub rsp, 48
|
|
|
- mov r13, QWORD PTR [rdx]
|
|
|
- xor r12, r12
|
|
|
- mov rax, r13
|
|
|
- and r13, 1
|
|
|
- neg r13
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+8], r10
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+40], r10
|
|
|
- add QWORD PTR [rsp], rax
|
|
|
- mov rax, QWORD PTR [rdx+8]
|
|
|
- adc QWORD PTR [rsp+8], rax
|
|
|
- mov rax, QWORD PTR [rdx+16]
|
|
|
- adc QWORD PTR [rsp+16], rax
|
|
|
- mov rax, QWORD PTR [rdx+24]
|
|
|
- adc QWORD PTR [rsp+24], rax
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- adc QWORD PTR [rsp+32], rax
|
|
|
- mov rax, QWORD PTR [rdx+40]
|
|
|
- adc QWORD PTR [rsp+40], rax
|
|
|
- adc r12, 0
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov r9, QWORD PTR [rsp+8]
|
|
|
- shrd rax, r9, 1
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov rax, QWORD PTR [rsp+16]
|
|
|
- shrd r9, rax, 1
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov r9, QWORD PTR [rsp+24]
|
|
|
- shrd rax, r9, 1
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- shrd r9, rax, 1
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- mov r9, QWORD PTR [rsp+40]
|
|
|
- shrd rax, r9, 1
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- shrd r9, r12, 1
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- add rsp, 48
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mont_div2_6 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-; /* Touch each possible point that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of point to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_get_point_33_6 PROC
|
|
|
- sub rsp, 160
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- vmovdqu OWORD PTR [rsp+128], xmm14
|
|
|
- vmovdqu OWORD PTR [rsp+144], xmm15
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- add rdx, 296
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 32
|
|
|
- pshufd xmm15, xmm15, 0
|
|
|
- pshufd xmm13, xmm13, 0
|
|
|
- pxor xmm14, xmm14
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- movdqa xmm14, xmm15
|
|
|
-L_384_get_point_33_6_start_1:
|
|
|
- movdqa xmm12, xmm14
|
|
|
- paddd xmm14, xmm15
|
|
|
- pcmpeqd xmm12, xmm13
|
|
|
- movdqu xmm6, [rdx]
|
|
|
- movdqu xmm7, [rdx+16]
|
|
|
- movdqu xmm8, [rdx+32]
|
|
|
- movdqu xmm9, [rdx+96]
|
|
|
- movdqu xmm10, [rdx+112]
|
|
|
- movdqu xmm11, [rdx+128]
|
|
|
- add rdx, 296
|
|
|
- pand xmm6, xmm12
|
|
|
- pand xmm7, xmm12
|
|
|
- pand xmm8, xmm12
|
|
|
- pand xmm9, xmm12
|
|
|
- pand xmm10, xmm12
|
|
|
- pand xmm11, xmm12
|
|
|
- por xmm0, xmm6
|
|
|
- por xmm1, xmm7
|
|
|
- por xmm2, xmm8
|
|
|
- por xmm3, xmm9
|
|
|
- por xmm4, xmm10
|
|
|
- por xmm5, xmm11
|
|
|
- dec rax
|
|
|
- jnz L_384_get_point_33_6_start_1
|
|
|
- movdqu [rcx], xmm0
|
|
|
- movdqu [rcx+16], xmm1
|
|
|
- movdqu [rcx+32], xmm2
|
|
|
- movdqu [rcx+96], xmm3
|
|
|
- movdqu [rcx+112], xmm4
|
|
|
- movdqu [rcx+128], xmm5
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- sub rdx, 9472
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 32
|
|
|
- pshufd xmm15, xmm15, 0
|
|
|
- pshufd xmm13, xmm13, 0
|
|
|
- pxor xmm14, xmm14
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- movdqa xmm14, xmm15
|
|
|
-L_384_get_point_33_6_start_2:
|
|
|
- movdqa xmm12, xmm14
|
|
|
- paddd xmm14, xmm15
|
|
|
- pcmpeqd xmm12, xmm13
|
|
|
- movdqu xmm6, [rdx+192]
|
|
|
- movdqu xmm7, [rdx+208]
|
|
|
- movdqu xmm8, [rdx+224]
|
|
|
- add rdx, 296
|
|
|
- pand xmm6, xmm12
|
|
|
- pand xmm7, xmm12
|
|
|
- pand xmm8, xmm12
|
|
|
- por xmm0, xmm6
|
|
|
- por xmm1, xmm7
|
|
|
- por xmm2, xmm8
|
|
|
- dec rax
|
|
|
- jnz L_384_get_point_33_6_start_2
|
|
|
- movdqu [rcx+192], xmm0
|
|
|
- movdqu [rcx+208], xmm1
|
|
|
- movdqu [rcx+224], xmm2
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- vmovdqu xmm14, OWORD PTR [rsp+128]
|
|
|
- vmovdqu xmm15, OWORD PTR [rsp+144]
|
|
|
- add rsp, 160
|
|
|
- ret
|
|
|
-sp_384_get_point_33_6 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Touch each possible point that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of point to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_get_point_33_avx2_6 PROC
|
|
|
- sub rsp, 160
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- vmovdqu OWORD PTR [rsp+128], xmm14
|
|
|
- vmovdqu OWORD PTR [rsp+144], xmm15
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- add rdx, 296
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 32
|
|
|
- vpxor ymm14, ymm14, ymm14
|
|
|
- vpermd ymm13, ymm14, ymm13
|
|
|
- vpermd ymm15, ymm14, ymm15
|
|
|
- vpxor ymm0, ymm0, ymm0
|
|
|
- vpxor xmm1, xmm1, xmm1
|
|
|
- vpxor ymm2, ymm2, ymm2
|
|
|
- vpxor xmm3, xmm3, xmm3
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor xmm5, xmm5, xmm5
|
|
|
- vmovdqa ymm14, ymm15
|
|
|
-L_384_get_point_33_avx2_6_start:
|
|
|
- vpcmpeqd ymm12, ymm14, ymm13
|
|
|
- vpaddd ymm14, ymm14, ymm15
|
|
|
- vmovupd ymm6, YMMWORD PTR [rdx]
|
|
|
- vmovdqu xmm7, OWORD PTR [rdx+32]
|
|
|
- vmovupd ymm8, YMMWORD PTR [rdx+96]
|
|
|
- vmovdqu xmm9, OWORD PTR [rdx+128]
|
|
|
- vmovupd ymm10, YMMWORD PTR [rdx+192]
|
|
|
- vmovdqu xmm11, OWORD PTR [rdx+224]
|
|
|
- add rdx, 296
|
|
|
- vpand ymm6, ymm6, ymm12
|
|
|
- vpand xmm7, xmm7, xmm12
|
|
|
- vpand ymm8, ymm8, ymm12
|
|
|
- vpand xmm9, xmm9, xmm12
|
|
|
- vpand ymm10, ymm10, ymm12
|
|
|
- vpand xmm11, xmm11, xmm12
|
|
|
- vpor ymm0, ymm0, ymm6
|
|
|
- vpor xmm1, xmm1, xmm7
|
|
|
- vpor ymm2, ymm2, ymm8
|
|
|
- vpor xmm3, xmm3, xmm9
|
|
|
- vpor ymm4, ymm4, ymm10
|
|
|
- vpor xmm5, xmm5, xmm11
|
|
|
- dec rax
|
|
|
- jnz L_384_get_point_33_avx2_6_start
|
|
|
- vmovupd YMMWORD PTR [rcx], ymm0
|
|
|
- vmovdqu OWORD PTR [rcx+32], xmm1
|
|
|
- vmovupd YMMWORD PTR [rcx+96], ymm2
|
|
|
- vmovdqu OWORD PTR [rcx+128], xmm3
|
|
|
- vmovupd YMMWORD PTR [rcx+192], ymm4
|
|
|
- vmovdqu OWORD PTR [rcx+224], xmm5
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- vmovdqu xmm14, OWORD PTR [rsp+128]
|
|
|
- vmovdqu xmm15, OWORD PTR [rsp+144]
|
|
|
- add rsp, 160
|
|
|
- ret
|
|
|
-sp_384_get_point_33_avx2_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Reduce the number back to 384 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mont_reduce_order_avx2_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- mov rax, rdx
|
|
|
- xor r15, r15
|
|
|
- mov r14, QWORD PTR [rcx]
|
|
|
- xor r13, r13
|
|
|
-L_mont_loop_order_avx2_6:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r11, r14
|
|
|
- imul rdx, r8
|
|
|
- xor r13, r13
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax]
|
|
|
- mov r14, QWORD PTR [rcx+8]
|
|
|
- adcx r11, r9
|
|
|
- adox r14, r10
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, QWORD PTR [rcx+16]
|
|
|
- adcx r14, r9
|
|
|
- adox r11, r10
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, QWORD PTR [rcx+24]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, QWORD PTR [rcx+32]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, QWORD PTR [rcx+40]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, QWORD PTR [rcx+48]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- adcx r11, r15
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- mov r15, r13
|
|
|
- adox r15, r13
|
|
|
- adcx r15, r13
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r11, r14
|
|
|
- imul rdx, r8
|
|
|
- xor r13, r13
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax]
|
|
|
- mov r14, QWORD PTR [rcx+16]
|
|
|
- adcx r11, r9
|
|
|
- adox r14, r10
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- adcx r14, r9
|
|
|
- adox r11, r10
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, QWORD PTR [rcx+32]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+32], r12
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, QWORD PTR [rcx+48]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+48], r12
|
|
|
- adcx r11, r15
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov r15, r13
|
|
|
- adox r15, r13
|
|
|
- adcx r15, r13
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r11, r14
|
|
|
- imul rdx, r8
|
|
|
- xor r13, r13
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax]
|
|
|
- mov r14, QWORD PTR [rcx+24]
|
|
|
- adcx r11, r9
|
|
|
- adox r14, r10
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, QWORD PTR [rcx+32]
|
|
|
- adcx r14, r9
|
|
|
- adox r11, r10
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, QWORD PTR [rcx+40]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, QWORD PTR [rcx+48]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, QWORD PTR [rcx+56]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, QWORD PTR [rcx+64]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- adcx r11, r15
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- mov r15, r13
|
|
|
- adox r15, r13
|
|
|
- adcx r15, r13
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r11, r14
|
|
|
- imul rdx, r8
|
|
|
- xor r13, r13
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax]
|
|
|
- mov r14, QWORD PTR [rcx+32]
|
|
|
- adcx r11, r9
|
|
|
- adox r14, r10
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- adcx r14, r9
|
|
|
- adox r11, r10
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, QWORD PTR [rcx+48]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+48], r12
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, QWORD PTR [rcx+64]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, QWORD PTR [rcx+72]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- adcx r11, r15
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov r15, r13
|
|
|
- adox r15, r13
|
|
|
- adcx r15, r13
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r11, r14
|
|
|
- imul rdx, r8
|
|
|
- xor r13, r13
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax]
|
|
|
- mov r14, QWORD PTR [rcx+40]
|
|
|
- adcx r11, r9
|
|
|
- adox r14, r10
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, QWORD PTR [rcx+48]
|
|
|
- adcx r14, r9
|
|
|
- adox r11, r10
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, QWORD PTR [rcx+56]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, QWORD PTR [rcx+64]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, QWORD PTR [rcx+72]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, QWORD PTR [rcx+80]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+72], r12
|
|
|
- adcx r11, r15
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- mov r15, r13
|
|
|
- adox r15, r13
|
|
|
- adcx r15, r13
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r11, r14
|
|
|
- imul rdx, r8
|
|
|
- xor r13, r13
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax]
|
|
|
- mov r14, QWORD PTR [rcx+48]
|
|
|
- adcx r11, r9
|
|
|
- adox r14, r10
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- adcx r14, r9
|
|
|
- adox r11, r10
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, QWORD PTR [rcx+64]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, QWORD PTR [rcx+72]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, QWORD PTR [rcx+80]
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, QWORD PTR [rcx+88]
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+80], r12
|
|
|
- adcx r11, r15
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov r15, r13
|
|
|
- adox r15, r13
|
|
|
- adcx r15, r13
|
|
|
- neg r15
|
|
|
- mov r8, rcx
|
|
|
- add rcx, 48
|
|
|
- mov r10, QWORD PTR [rax]
|
|
|
- mov rdx, r14
|
|
|
- pext r10, r10, r15
|
|
|
- sub rdx, r10
|
|
|
- mov r10, QWORD PTR [rax+8]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [r8], rdx
|
|
|
- sbb r9, r10
|
|
|
- mov rdx, QWORD PTR [rax+16]
|
|
|
- mov r10, QWORD PTR [rcx+16]
|
|
|
- pext rdx, rdx, r15
|
|
|
- mov QWORD PTR [r8+8], r9
|
|
|
- sbb r10, rdx
|
|
|
- mov r9, QWORD PTR [rax+24]
|
|
|
- mov rdx, QWORD PTR [rcx+24]
|
|
|
- pext r9, r9, r15
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- sbb rdx, r9
|
|
|
- mov r10, QWORD PTR [rax+32]
|
|
|
- mov r9, QWORD PTR [rcx+32]
|
|
|
- pext r10, r10, r15
|
|
|
- mov QWORD PTR [r8+24], rdx
|
|
|
- sbb r9, r10
|
|
|
- mov rdx, QWORD PTR [rax+40]
|
|
|
- mov r10, QWORD PTR [rcx+40]
|
|
|
- pext rdx, rdx, r15
|
|
|
- mov QWORD PTR [r8+32], r9
|
|
|
- sbb r10, rdx
|
|
|
- mov QWORD PTR [r8+40], r10
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mont_reduce_order_avx2_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_cond_sub_avx2_6 PROC
|
|
|
- push r12
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- pext r12, r12, r9
|
|
|
- sub r10, r12
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- sbb r12, r10
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- sbb rax, rax
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_cond_sub_avx2_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
|
|
|
-; *
|
|
|
-; * r Result of division by 2.
|
|
|
-; * a Number to divide.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mont_div2_avx2_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov r13, QWORD PTR [rdx]
|
|
|
- xor r12, r12
|
|
|
- mov r10, r13
|
|
|
- and r13, 1
|
|
|
- neg r13
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mov r9, QWORD PTR [r8+8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext rax, rax, r13
|
|
|
- pext r9, r9, r13
|
|
|
- add r10, rax
|
|
|
- adc r11, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mov r9, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- pext rax, rax, r13
|
|
|
- pext r9, r9, r13
|
|
|
- adc r10, rax
|
|
|
- adc r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mov r9, QWORD PTR [r8+40]
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- pext rax, rax, r13
|
|
|
- pext r9, r9, r13
|
|
|
- adc r10, rax
|
|
|
- adc r11, r9
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- adc r12, 0
|
|
|
- mov r10, QWORD PTR [rcx]
|
|
|
- mov r11, QWORD PTR [rcx+8]
|
|
|
- shrd r10, r11, 1
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rcx+16]
|
|
|
- shrd r11, r10, 1
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- shrd r10, r11, 1
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rcx+32]
|
|
|
- shrd r11, r10, 1
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- shrd r10, r11, 1
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- shrd r11, r12, 1
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mont_div2_avx2_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_get_entry_64_6 PROC
|
|
|
- sub rsp, 160
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- vmovdqu OWORD PTR [rsp+128], xmm14
|
|
|
- vmovdqu OWORD PTR [rsp+144], xmm15
|
|
|
- ; From entry 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- add rdx, 96
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 63
|
|
|
- pshufd xmm15, xmm15, 0
|
|
|
- pshufd xmm13, xmm13, 0
|
|
|
- pxor xmm14, xmm14
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- movdqa xmm14, xmm15
|
|
|
-L_384_get_entry_64_6_start_0:
|
|
|
- movdqa xmm12, xmm14
|
|
|
- paddd xmm14, xmm15
|
|
|
- pcmpeqd xmm12, xmm13
|
|
|
- movdqu xmm6, [rdx]
|
|
|
- movdqu xmm7, [rdx+16]
|
|
|
- movdqu xmm8, [rdx+32]
|
|
|
- movdqu xmm9, [rdx+48]
|
|
|
- movdqu xmm10, [rdx+64]
|
|
|
- movdqu xmm11, [rdx+80]
|
|
|
- add rdx, 96
|
|
|
- pand xmm6, xmm12
|
|
|
- pand xmm7, xmm12
|
|
|
- pand xmm8, xmm12
|
|
|
- pand xmm9, xmm12
|
|
|
- pand xmm10, xmm12
|
|
|
- pand xmm11, xmm12
|
|
|
- por xmm0, xmm6
|
|
|
- por xmm1, xmm7
|
|
|
- por xmm2, xmm8
|
|
|
- por xmm3, xmm9
|
|
|
- por xmm4, xmm10
|
|
|
- por xmm5, xmm11
|
|
|
- dec rax
|
|
|
- jnz L_384_get_entry_64_6_start_0
|
|
|
- movdqu [rcx], xmm0
|
|
|
- movdqu [rcx+16], xmm1
|
|
|
- movdqu [rcx+32], xmm2
|
|
|
- movdqu [rcx+96], xmm3
|
|
|
- movdqu [rcx+112], xmm4
|
|
|
- movdqu [rcx+128], xmm5
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- vmovdqu xmm14, OWORD PTR [rsp+128]
|
|
|
- vmovdqu xmm15, OWORD PTR [rsp+144]
|
|
|
- add rsp, 160
|
|
|
- ret
|
|
|
-sp_384_get_entry_64_6 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_get_entry_64_avx2_6 PROC
|
|
|
- sub rsp, 96
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- mov rax, 1
|
|
|
- movd xmm9, r8d
|
|
|
- add rdx, 96
|
|
|
- movd xmm11, eax
|
|
|
- mov rax, 64
|
|
|
- vpxor ymm10, ymm10, ymm10
|
|
|
- vpermd ymm9, ymm10, ymm9
|
|
|
- vpermd ymm11, ymm10, ymm11
|
|
|
- vpxor ymm0, ymm0, ymm0
|
|
|
- vpxor xmm1, xmm1, xmm1
|
|
|
- vpxor ymm2, ymm2, ymm2
|
|
|
- vpxor xmm3, xmm3, xmm3
|
|
|
- vmovdqa ymm10, ymm11
|
|
|
-L_384_get_entry_64_avx2_6_start:
|
|
|
- vpcmpeqd ymm8, ymm10, ymm9
|
|
|
- vpaddd ymm10, ymm10, ymm11
|
|
|
- vmovupd ymm4, YMMWORD PTR [rdx]
|
|
|
- vmovdqu xmm5, OWORD PTR [rdx+32]
|
|
|
- vmovupd ymm6, YMMWORD PTR [rdx+48]
|
|
|
- vmovdqu xmm7, OWORD PTR [rdx+80]
|
|
|
- add rdx, 96
|
|
|
- vpand ymm4, ymm4, ymm8
|
|
|
- vpand xmm5, xmm5, xmm8
|
|
|
- vpand ymm6, ymm6, ymm8
|
|
|
- vpand xmm7, xmm7, xmm8
|
|
|
- vpor ymm0, ymm0, ymm4
|
|
|
- vpor xmm1, xmm1, xmm5
|
|
|
- vpor ymm2, ymm2, ymm6
|
|
|
- vpor xmm3, xmm3, xmm7
|
|
|
- dec rax
|
|
|
- jnz L_384_get_entry_64_avx2_6_start
|
|
|
- vmovupd YMMWORD PTR [rcx], ymm0
|
|
|
- vmovdqu OWORD PTR [rcx+32], xmm1
|
|
|
- vmovupd YMMWORD PTR [rcx+96], ymm2
|
|
|
- vmovdqu OWORD PTR [rcx+128], xmm3
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- add rsp, 96
|
|
|
- ret
|
|
|
-sp_384_get_entry_64_avx2_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_get_entry_65_6 PROC
|
|
|
- sub rsp, 160
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- vmovdqu OWORD PTR [rsp+128], xmm14
|
|
|
- vmovdqu OWORD PTR [rsp+144], xmm15
|
|
|
- ; From entry 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- add rdx, 96
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 64
|
|
|
- pshufd xmm15, xmm15, 0
|
|
|
- pshufd xmm13, xmm13, 0
|
|
|
- pxor xmm14, xmm14
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- movdqa xmm14, xmm15
|
|
|
-L_384_get_entry_65_6_start_0:
|
|
|
- movdqa xmm12, xmm14
|
|
|
- paddd xmm14, xmm15
|
|
|
- pcmpeqd xmm12, xmm13
|
|
|
- movdqu xmm6, [rdx]
|
|
|
- movdqu xmm7, [rdx+16]
|
|
|
- movdqu xmm8, [rdx+32]
|
|
|
- movdqu xmm9, [rdx+48]
|
|
|
- movdqu xmm10, [rdx+64]
|
|
|
- movdqu xmm11, [rdx+80]
|
|
|
- add rdx, 96
|
|
|
- pand xmm6, xmm12
|
|
|
- pand xmm7, xmm12
|
|
|
- pand xmm8, xmm12
|
|
|
- pand xmm9, xmm12
|
|
|
- pand xmm10, xmm12
|
|
|
- pand xmm11, xmm12
|
|
|
- por xmm0, xmm6
|
|
|
- por xmm1, xmm7
|
|
|
- por xmm2, xmm8
|
|
|
- por xmm3, xmm9
|
|
|
- por xmm4, xmm10
|
|
|
- por xmm5, xmm11
|
|
|
- dec rax
|
|
|
- jnz L_384_get_entry_65_6_start_0
|
|
|
- movdqu [rcx], xmm0
|
|
|
- movdqu [rcx+16], xmm1
|
|
|
- movdqu [rcx+32], xmm2
|
|
|
- movdqu [rcx+96], xmm3
|
|
|
- movdqu [rcx+112], xmm4
|
|
|
- movdqu [rcx+128], xmm5
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- vmovdqu xmm14, OWORD PTR [rsp+128]
|
|
|
- vmovdqu xmm15, OWORD PTR [rsp+144]
|
|
|
- add rsp, 160
|
|
|
- ret
|
|
|
-sp_384_get_entry_65_6 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_get_entry_65_avx2_6 PROC
|
|
|
- sub rsp, 96
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- mov rax, 1
|
|
|
- movd xmm9, r8d
|
|
|
- add rdx, 96
|
|
|
- movd xmm11, eax
|
|
|
- mov rax, 65
|
|
|
- vpxor ymm10, ymm10, ymm10
|
|
|
- vpermd ymm9, ymm10, ymm9
|
|
|
- vpermd ymm11, ymm10, ymm11
|
|
|
- vpxor ymm0, ymm0, ymm0
|
|
|
- vpxor xmm1, xmm1, xmm1
|
|
|
- vpxor ymm2, ymm2, ymm2
|
|
|
- vpxor xmm3, xmm3, xmm3
|
|
|
- vmovdqa ymm10, ymm11
|
|
|
-L_384_get_entry_65_avx2_6_start:
|
|
|
- vpcmpeqd ymm8, ymm10, ymm9
|
|
|
- vpaddd ymm10, ymm10, ymm11
|
|
|
- vmovupd ymm4, YMMWORD PTR [rdx]
|
|
|
- vmovdqu xmm5, OWORD PTR [rdx+32]
|
|
|
- vmovupd ymm6, YMMWORD PTR [rdx+48]
|
|
|
- vmovdqu xmm7, OWORD PTR [rdx+80]
|
|
|
- add rdx, 96
|
|
|
- vpand ymm4, ymm4, ymm8
|
|
|
- vpand xmm5, xmm5, xmm8
|
|
|
- vpand ymm6, ymm6, ymm8
|
|
|
- vpand xmm7, xmm7, xmm8
|
|
|
- vpor ymm0, ymm0, ymm4
|
|
|
- vpor xmm1, xmm1, xmm5
|
|
|
- vpor ymm2, ymm2, ymm6
|
|
|
- vpor xmm3, xmm3, xmm7
|
|
|
- dec rax
|
|
|
- jnz L_384_get_entry_65_avx2_6_start
|
|
|
- vmovupd YMMWORD PTR [rcx], ymm0
|
|
|
- vmovdqu OWORD PTR [rcx+32], xmm1
|
|
|
- vmovupd YMMWORD PTR [rcx+96], ymm2
|
|
|
- vmovdqu OWORD PTR [rcx+128], xmm3
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- add rsp, 96
|
|
|
- ret
|
|
|
-sp_384_get_entry_65_avx2_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-; /* Add 1 to a. (a = a + 1)
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_add_one_6 PROC
|
|
|
- add QWORD PTR [rcx], 1
|
|
|
- adc QWORD PTR [rcx+8], 0
|
|
|
- adc QWORD PTR [rcx+16], 0
|
|
|
- adc QWORD PTR [rcx+24], 0
|
|
|
- adc QWORD PTR [rcx+32], 0
|
|
|
- adc QWORD PTR [rcx+40], 0
|
|
|
- ret
|
|
|
-sp_384_add_one_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_from_bin_bswap PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 48
|
|
|
- xor r13, r13
|
|
|
- jmp L_384_from_bin_bswap_64_end
|
|
|
-L_384_from_bin_bswap_64_start:
|
|
|
- sub r11, 64
|
|
|
- mov rax, QWORD PTR [r11+56]
|
|
|
- mov r10, QWORD PTR [r11+48]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov rax, QWORD PTR [r11+40]
|
|
|
- mov r10, QWORD PTR [r11+32]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov rax, QWORD PTR [r11+24]
|
|
|
- mov r10, QWORD PTR [r11+16]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov rax, QWORD PTR [r11+8]
|
|
|
- mov r10, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_384_from_bin_bswap_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_384_from_bin_bswap_64_start
|
|
|
- jmp L_384_from_bin_bswap_8_end
|
|
|
-L_384_from_bin_bswap_8_start:
|
|
|
- sub r11, 8
|
|
|
- mov rax, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_384_from_bin_bswap_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_384_from_bin_bswap_8_start
|
|
|
- cmp r9, r13
|
|
|
- je L_384_from_bin_bswap_hi_end
|
|
|
- mov r10, r13
|
|
|
- mov rax, r13
|
|
|
-L_384_from_bin_bswap_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_384_from_bin_bswap_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_384_from_bin_bswap_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_384_from_bin_bswap_zero_end
|
|
|
-L_384_from_bin_bswap_zero_start:
|
|
|
- mov QWORD PTR [rcx], r13
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_384_from_bin_bswap_zero_start
|
|
|
-L_384_from_bin_bswap_zero_end:
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_from_bin_bswap ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the movbe instruction which is an optional instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_from_bin_movbe PROC
|
|
|
- push r12
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 48
|
|
|
- jmp L_384_from_bin_movbe_64_end
|
|
|
-L_384_from_bin_movbe_64_start:
|
|
|
- sub r11, 64
|
|
|
- movbe rax, QWORD PTR [r11+56]
|
|
|
- movbe r10, QWORD PTR [r11+48]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- movbe rax, QWORD PTR [r11+40]
|
|
|
- movbe r10, QWORD PTR [r11+32]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- movbe rax, QWORD PTR [r11+24]
|
|
|
- movbe r10, QWORD PTR [r11+16]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- movbe rax, QWORD PTR [r11+8]
|
|
|
- movbe r10, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_384_from_bin_movbe_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_384_from_bin_movbe_64_start
|
|
|
- jmp L_384_from_bin_movbe_8_end
|
|
|
-L_384_from_bin_movbe_8_start:
|
|
|
- sub r11, 8
|
|
|
- movbe rax, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_384_from_bin_movbe_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_384_from_bin_movbe_8_start
|
|
|
- cmp r9, 0
|
|
|
- je L_384_from_bin_movbe_hi_end
|
|
|
- mov r10, 0
|
|
|
- mov rax, 0
|
|
|
-L_384_from_bin_movbe_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_384_from_bin_movbe_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_384_from_bin_movbe_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_384_from_bin_movbe_zero_end
|
|
|
-L_384_from_bin_movbe_zero_start:
|
|
|
- mov QWORD PTR [rcx], 0
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_384_from_bin_movbe_zero_start
|
|
|
-L_384_from_bin_movbe_zero_end:
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_from_bin_movbe ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 48
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_to_bin_bswap_6 PROC
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx], rax
|
|
|
- mov QWORD PTR [rdx+8], r8
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+16], rax
|
|
|
- mov QWORD PTR [rdx+24], r8
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+32], rax
|
|
|
- mov QWORD PTR [rdx+40], r8
|
|
|
- ret
|
|
|
-sp_384_to_bin_bswap_6 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 48
|
|
|
-; * Uses the movbe instruction which is optional.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_to_bin_movbe_6 PROC
|
|
|
- movbe rax, QWORD PTR [rcx+40]
|
|
|
- movbe r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rdx], rax
|
|
|
- mov QWORD PTR [rdx+8], r8
|
|
|
- movbe rax, QWORD PTR [rcx+24]
|
|
|
- movbe r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rdx+16], rax
|
|
|
- mov QWORD PTR [rdx+24], r8
|
|
|
- movbe rax, QWORD PTR [rcx+8]
|
|
|
- movbe r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rdx+32], rax
|
|
|
- mov QWORD PTR [rdx+40], r8
|
|
|
- ret
|
|
|
-sp_384_to_bin_movbe_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Sub b from a into a. (a -= b)
|
|
|
-; *
|
|
|
-; * a A single precision integer and result.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_sub_in_place_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- mov r13, QWORD PTR [rdx+40]
|
|
|
- sub QWORD PTR [rcx], r8
|
|
|
- sbb QWORD PTR [rcx+8], r9
|
|
|
- sbb QWORD PTR [rcx+16], r10
|
|
|
- sbb QWORD PTR [rcx+24], r11
|
|
|
- sbb QWORD PTR [rcx+32], r12
|
|
|
- sbb QWORD PTR [rcx+40], r13
|
|
|
- sbb rax, rax
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_sub_in_place_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mul_d_6 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- mov r10, rax
|
|
|
- mov r11, rdx
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- ; A[1] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B
|
|
|
- mov rax, r8
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mul_d_6 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_mul_d_avx2_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rdx, r8
|
|
|
- xor r13, r13
|
|
|
- mulx r12, r11, QWORD PTR [rax]
|
|
|
- mov QWORD PTR [rcx], r11
|
|
|
- ; A[1] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+8], r12
|
|
|
- ; A[2] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- ; A[3] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- ; A[4] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- ; A[5] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- adcx r11, r13
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_mul_d_avx2_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF _WIN64
|
|
|
-; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
|
|
|
-; *
|
|
|
-; * d1 The high order half of the number to divide.
|
|
|
-; * d0 The low order half of the number to divide.
|
|
|
-; * div The dividend.
|
|
|
-; * returns the result of the division.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-div_384_word_asm_6 PROC
|
|
|
- mov r9, rdx
|
|
|
- mov rax, r9
|
|
|
- mov rdx, rcx
|
|
|
- div r8
|
|
|
- ret
|
|
|
-div_384_word_asm_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Shift number right by 1 bit. (r = a >> 1)
|
|
|
-; *
|
|
|
-; * r Result of right shift by 1.
|
|
|
-; * a Number to shift.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_rshift1_6 PROC
|
|
|
- push r12
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- shrd rax, r8, 1
|
|
|
- shrd r8, r9, 1
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r11, 1
|
|
|
- shrd r11, r12, 1
|
|
|
- shr r12, 1
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_rshift1_6 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Divide the number by 2 mod the prime. (r = a / 2 % m)
|
|
|
-; *
|
|
|
-; * r Result of division by 2.
|
|
|
-; * a Number to divide.
|
|
|
-; * m Modulus
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_div2_mod_6 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- mov r13, QWORD PTR [rdx+40]
|
|
|
- mov r14, QWORD PTR [r8]
|
|
|
- mov r15, QWORD PTR [r8+8]
|
|
|
- mov rdi, QWORD PTR [r8+16]
|
|
|
- mov rsi, QWORD PTR [r8+24]
|
|
|
- mov rbx, QWORD PTR [r8+32]
|
|
|
- mov rbp, QWORD PTR [r8+40]
|
|
|
- mov r8, rax
|
|
|
- and r8, 1
|
|
|
- je L_384_mod_inv_6_div2_mod_no_add
|
|
|
- add rax, r14
|
|
|
- adc r9, r15
|
|
|
- adc r10, rdi
|
|
|
- adc r11, rsi
|
|
|
- adc r12, rbx
|
|
|
- adc r13, rbp
|
|
|
- mov r8, 0
|
|
|
- adc r8, 0
|
|
|
-L_384_mod_inv_6_div2_mod_no_add:
|
|
|
- shrd rax, r9, 1
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r11, 1
|
|
|
- shrd r11, r12, 1
|
|
|
- shrd r12, r13, 1
|
|
|
- shrd r13, r8, 1
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov QWORD PTR [rcx+32], r12
|
|
|
- mov QWORD PTR [rcx+40], r13
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_384_div2_mod_6 ENDP
|
|
|
-_text ENDS
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_384_num_bits_6 PROC
|
|
|
- xor rax, rax
|
|
|
- mov rdx, QWORD PTR [rcx+40]
|
|
|
- cmp rdx, 0
|
|
|
- je L_384_num_bits_6_end_320
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 321
|
|
|
- jmp L_384_num_bits_6_done
|
|
|
-L_384_num_bits_6_end_320:
|
|
|
- mov rdx, QWORD PTR [rcx+32]
|
|
|
- cmp rdx, 0
|
|
|
- je L_384_num_bits_6_end_256
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 257
|
|
|
- jmp L_384_num_bits_6_done
|
|
|
-L_384_num_bits_6_end_256:
|
|
|
- mov rdx, QWORD PTR [rcx+24]
|
|
|
- cmp rdx, 0
|
|
|
- je L_384_num_bits_6_end_192
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 193
|
|
|
- jmp L_384_num_bits_6_done
|
|
|
-L_384_num_bits_6_end_192:
|
|
|
- mov rdx, QWORD PTR [rcx+16]
|
|
|
- cmp rdx, 0
|
|
|
- je L_384_num_bits_6_end_128
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 129
|
|
|
- jmp L_384_num_bits_6_done
|
|
|
-L_384_num_bits_6_end_128:
|
|
|
- mov rdx, QWORD PTR [rcx+8]
|
|
|
- cmp rdx, 0
|
|
|
- je L_384_num_bits_6_end_64
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 65
|
|
|
- jmp L_384_num_bits_6_done
|
|
|
-L_384_num_bits_6_end_64:
|
|
|
- mov rdx, QWORD PTR [rcx]
|
|
|
- cmp rdx, 0
|
|
|
- je L_384_num_bits_6_end_0
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 1
|
|
|
- jmp L_384_num_bits_6_done
|
|
|
-L_384_num_bits_6_end_0:
|
|
|
-L_384_num_bits_6_done:
|
|
|
- ret
|
|
|
-sp_384_num_bits_6 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF WOLFSSL_SP_521
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mul_9 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 72
|
|
|
- ; A[0] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r11, rdx
|
|
|
- ; A[0] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- ; A[0] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+16], r12
|
|
|
- ; A[0] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- ; A[0] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- ; A[0] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- ; A[0] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- ; A[0] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- ; A[0] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+64], r12
|
|
|
- ; A[1] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- ; A[2] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- ; A[3] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- ; A[4] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- ; A[5] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- ; A[6] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- ; A[7] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- ; A[8] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r10, QWORD PTR [rsp+16]
|
|
|
- mov r11, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- mov rdx, QWORD PTR [rsp+40]
|
|
|
- mov r10, QWORD PTR [rsp+48]
|
|
|
- mov r11, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], rdx
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rsp+64]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- add rsp, 72
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mul_9 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r Result of multiplication.
|
|
|
-; * a First number to multiply.
|
|
|
-; * b Second number to multiply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mul_avx2_9 PROC
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- mov rbp, r8
|
|
|
- mov r8, rcx
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 72
|
|
|
- cmp r9, r8
|
|
|
- mov rbx, rsp
|
|
|
- cmovne rbx, r8
|
|
|
- cmp rbp, r8
|
|
|
- cmove rbx, rsp
|
|
|
- add r8, 72
|
|
|
- xor r15, r15
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- ; A[0] * B[0]
|
|
|
- mulx r11, r10, QWORD PTR [rbp]
|
|
|
- ; A[0] * B[1]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx], r10
|
|
|
- adcx r11, rax
|
|
|
- ; A[0] * B[2]
|
|
|
- mulx r13, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+8], r11
|
|
|
- adcx r12, rax
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- ; A[0] * B[3]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r13, rax
|
|
|
- ; A[0] * B[4]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r10, rax
|
|
|
- ; A[0] * B[5]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+32], r10
|
|
|
- adcx r11, rax
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- ; A[0] * B[6]
|
|
|
- mulx r13, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r12, rax
|
|
|
- ; A[0] * B[7]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- adcx r13, rax
|
|
|
- ; A[0] * B[8]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, r15
|
|
|
- mov r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mov r11, QWORD PTR [rbx+8]
|
|
|
- mov r12, QWORD PTR [rbx+16]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r10, QWORD PTR [rbx+32]
|
|
|
- ; A[1] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[1] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- mov r11, QWORD PTR [rbx+40]
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- ; A[1] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[1] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[1] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- mov r12, r15
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mov r12, QWORD PTR [rbx+16]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r10, QWORD PTR [rbx+32]
|
|
|
- mov r11, QWORD PTR [rbx+40]
|
|
|
- ; A[2] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[2] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[2] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+32], r10
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- ; A[2] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[2] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[2] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[2] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r13, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- adcx r13, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r10, QWORD PTR [rbx+32]
|
|
|
- mov r11, QWORD PTR [rbx+40]
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- ; A[3] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[3] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[3] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[3] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[3] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[3] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- ; A[3] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[3] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[3] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov r10, r15
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mov r10, QWORD PTR [rbx+32]
|
|
|
- mov r11, QWORD PTR [rbx+40]
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- ; A[4] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[4] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[4] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[4] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[4] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov r11, r15
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mov r11, QWORD PTR [rbx+40]
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- ; A[5] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[5] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[5] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- ; A[5] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[5] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- ; A[5] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov r12, r15
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[6] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[6] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[6] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[6] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[6] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- mov r12, QWORD PTR [r8+40]
|
|
|
- ; A[6] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov r13, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- adcx r13, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- mov QWORD PTR [r8+48], r13
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[7] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[7] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- ; A[7] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[7] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov r12, QWORD PTR [r8+40]
|
|
|
- mov r13, QWORD PTR [r8+48]
|
|
|
- ; A[7] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[7] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- mov r10, r15
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [r8+48], r13
|
|
|
- mov QWORD PTR [r8+56], r10
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- ; A[8] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[8] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[8] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- mov r12, QWORD PTR [r8+40]
|
|
|
- ; A[8] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[8] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[8] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov r13, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [r8+56]
|
|
|
- ; A[8] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[8] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[8] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [r8+48], r13
|
|
|
- mov r11, r15
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r14
|
|
|
- mov QWORD PTR [r8+56], r10
|
|
|
- mov QWORD PTR [r8+64], r11
|
|
|
- sub r8, 72
|
|
|
- cmp r9, r8
|
|
|
- je L_start_521_mul_avx2_9
|
|
|
- cmp rbp, r8
|
|
|
- jne L_end_521_mul_avx2_9
|
|
|
-L_start_521_mul_avx2_9:
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx]
|
|
|
- vmovups OWORD PTR [r8], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+16]
|
|
|
- vmovups OWORD PTR [r8+16], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+32]
|
|
|
- vmovups OWORD PTR [r8+32], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+48]
|
|
|
- vmovups OWORD PTR [r8+48], xmm0
|
|
|
- mov rax, QWORD PTR [rbx+64]
|
|
|
- mov QWORD PTR [r8+64], rax
|
|
|
-L_end_521_mul_avx2_9:
|
|
|
- add rsp, 72
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- ret
|
|
|
-sp_521_mul_avx2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_sqr_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- mov r8, rdx
|
|
|
- sub rsp, 72
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul rax
|
|
|
- xor r11, r11
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r10, rdx
|
|
|
- ; A[0] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsp+8], r10
|
|
|
- ; A[0] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+16], r11
|
|
|
- ; A[0] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+24], r9
|
|
|
- ; A[0] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[1] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- ; A[0] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- ; A[0] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rsp+48], r9
|
|
|
- ; A[0] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rsp+56], r10
|
|
|
- ; A[0] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+64], r11
|
|
|
- ; A[1] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[2] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- ; A[2] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[3] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- ; A[3] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[4] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- ; A[4] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul rax
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+96], r9
|
|
|
- ; A[5] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[6] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- ; A[6] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- ; A[7] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- ; A[8] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- mov QWORD PTR [rcx+128], r10
|
|
|
- mov QWORD PTR [rcx+136], r11
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r12, QWORD PTR [rsp+16]
|
|
|
- mov r13, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- mov QWORD PTR [rcx+24], r13
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- mov rdx, QWORD PTR [rsp+40]
|
|
|
- mov r12, QWORD PTR [rsp+48]
|
|
|
- mov r13, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], rdx
|
|
|
- mov QWORD PTR [rcx+48], r12
|
|
|
- mov QWORD PTR [rcx+56], r13
|
|
|
- mov rax, QWORD PTR [rsp+64]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- add rsp, 72
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_sqr_9 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_sqr_avx2_9 PROC
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov r8, rcx
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 72
|
|
|
- cmp r9, r8
|
|
|
- mov rbp, rsp
|
|
|
- cmovne rbp, r8
|
|
|
- add r8, 72
|
|
|
- xor r12, r12
|
|
|
- ; Diagonal 1
|
|
|
- ; Zero into %r9
|
|
|
- ; A[1] x A[0]
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- mulx r11, r10, QWORD PTR [r9+8]
|
|
|
- mov QWORD PTR [rbp+8], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[2] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [rbp+16], r11
|
|
|
- ; No load %r12 - %r9
|
|
|
- ; A[3] x A[0]
|
|
|
- mulx r14, rax, QWORD PTR [r9+24]
|
|
|
- adcx r10, rax
|
|
|
- adox r14, r12
|
|
|
- mov QWORD PTR [rbp+24], r10
|
|
|
- ; No load %r13 - %r8
|
|
|
- ; A[4] x A[0]
|
|
|
- mulx r15, rax, QWORD PTR [r9+32]
|
|
|
- adcx r14, rax
|
|
|
- adox r15, r12
|
|
|
- ; No store %r12 - %r9
|
|
|
- ; No load %r14 - %r9
|
|
|
- ; A[5] x A[0]
|
|
|
- mulx rdi, rax, QWORD PTR [r9+40]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, r12
|
|
|
- ; No store %r13 - %r8
|
|
|
- ; No load %r15 - %r8
|
|
|
- ; A[6] x A[0]
|
|
|
- mulx rsi, rax, QWORD PTR [r9+48]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, r12
|
|
|
- ; No store %r14 - %r9
|
|
|
- ; No load %rbx - %r9
|
|
|
- ; A[7] x A[0]
|
|
|
- mulx rbx, rax, QWORD PTR [r9+56]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, r12
|
|
|
- ; No store %r15 - %r8
|
|
|
- ; Zero into %r8
|
|
|
- ; A[8] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+64]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, r12
|
|
|
- ; No store %rbx - %r9
|
|
|
- ; Zero into %r9
|
|
|
- ; A[8] x A[1]
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mulx r11, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- ; Carry
|
|
|
- adcx r11, r12
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- ; Diagonal 2
|
|
|
- mov r11, QWORD PTR [rbp+24]
|
|
|
- ; No load %r12 - %r8
|
|
|
- ; A[2] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbp+24], r11
|
|
|
- ; No load %r13 - %r9
|
|
|
- ; A[3] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+24]
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; No store %r12 - %r8
|
|
|
- ; No load %r14 - %r8
|
|
|
- ; A[4] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; No store %r13 - %r9
|
|
|
- ; No load %r15 - %r9
|
|
|
- ; A[5] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; No store %r14 - %r8
|
|
|
- ; No load %rbx - %r8
|
|
|
- ; A[6] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r15 - %r9
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[7] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- ; A[7] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[7] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx r11, rax, QWORD PTR [r9+56]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [r8+8], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[7] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx r10, rax, QWORD PTR [r9+56]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [r8+16], r11
|
|
|
- ; Carry
|
|
|
- adcx r10, r13
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- ; Diagonal 3
|
|
|
- ; No load %r14 - %r9
|
|
|
- ; A[3] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+24]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; No store %r13 - %r8
|
|
|
- ; No load %r15 - %r8
|
|
|
- ; A[4] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; No store %r14 - %r9
|
|
|
- ; No load %rbx - %r9
|
|
|
- ; A[5] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r15 - %r8
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- ; A[6] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, rcx
|
|
|
- ; No store %rbx - %r9
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[6] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- ; A[6] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- ; A[6] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[8] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx r10, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [r8+24], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[8] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx r11, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- ; Carry
|
|
|
- adcx r11, r13
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- ; Diagonal 4
|
|
|
- ; No load %rbx - %r8
|
|
|
- ; A[4] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r15 - %r9
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[5] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- ; A[5] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r11, QWORD PTR [r8+16]
|
|
|
- ; A[8] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+8], r10
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[8] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+16], r11
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- ; A[7] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- ; A[7] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[8] x A[6]
|
|
|
- mulx r11, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [r8+40], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[8] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx r10, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [r8+48], r11
|
|
|
- ; Carry
|
|
|
- adcx r10, r13
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [r8+56], r10
|
|
|
- mov QWORD PTR [r8+64], r13
|
|
|
- ; Double and Add in A[i] x A[i]
|
|
|
- mov r11, QWORD PTR [rbp+8]
|
|
|
- ; A[0] x A[0]
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- mov QWORD PTR [rbp], rax
|
|
|
- adox r11, r11
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+8], r11
|
|
|
- mov r10, QWORD PTR [rbp+16]
|
|
|
- mov r11, QWORD PTR [rbp+24]
|
|
|
- ; A[1] x A[1]
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+16], r10
|
|
|
- mov QWORD PTR [rbp+24], r11
|
|
|
- ; A[2] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r14, r14
|
|
|
- adox r15, r15
|
|
|
- adcx r14, rax
|
|
|
- adcx r15, rcx
|
|
|
- ; A[3] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox rdi, rdi
|
|
|
- adox rsi, rsi
|
|
|
- adcx rdi, rax
|
|
|
- adcx rsi, rcx
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[4] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox rbx, rbx
|
|
|
- adox r11, r11
|
|
|
- adcx rbx, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [r8+16]
|
|
|
- ; A[5] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+8], r10
|
|
|
- mov QWORD PTR [r8+16], r11
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- ; A[6] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- ; A[7] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+40], r10
|
|
|
- mov QWORD PTR [r8+48], r11
|
|
|
- mov r10, QWORD PTR [r8+56]
|
|
|
- mov r11, QWORD PTR [r8+64]
|
|
|
- ; A[8] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+56], r10
|
|
|
- mov QWORD PTR [r8+64], r11
|
|
|
- mov QWORD PTR [r8+-40], r14
|
|
|
- mov QWORD PTR [r8+-32], r15
|
|
|
- mov QWORD PTR [r8+-24], rdi
|
|
|
- mov QWORD PTR [r8+-16], rsi
|
|
|
- mov QWORD PTR [r8+-8], rbx
|
|
|
- sub r8, 72
|
|
|
- cmp r9, r8
|
|
|
- jne L_end_521_sqr_avx2_9
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp]
|
|
|
- vmovups OWORD PTR [r8], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+16]
|
|
|
- vmovups OWORD PTR [r8+16], xmm0
|
|
|
-L_end_521_sqr_avx2_9:
|
|
|
- add rsp, 72
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- ret
|
|
|
-sp_521_sqr_avx2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Add b to a into r. (r = a + b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_add_9 PROC
|
|
|
- ; Add
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- xor rax, rax
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- adc r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- adc r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- adc r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- adc r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- adc r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- adc r9, QWORD PTR [r8+64]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- adc rax, 0
|
|
|
- ret
|
|
|
-sp_521_add_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Sub b from a into r. (r = a - b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_sub_9 PROC
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- sub r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- sbb r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- sbb r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- sbb r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- sbb r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- sbb r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- sbb r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- sbb r9, QWORD PTR [r8+64]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_521_sub_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Conditionally copy a into r using the mask m.
|
|
|
-; * m is -1 to copy and 0 when not.
|
|
|
-; *
|
|
|
-; * r A single precision number to copy over.
|
|
|
-; * a A single precision number to copy.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_cond_copy_9 PROC
|
|
|
- push r12
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [rcx+16]
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- mov r12, QWORD PTR [rcx+32]
|
|
|
- xor rax, QWORD PTR [rdx]
|
|
|
- xor r9, QWORD PTR [rdx+8]
|
|
|
- xor r10, QWORD PTR [rdx+16]
|
|
|
- xor r11, QWORD PTR [rdx+24]
|
|
|
- xor r12, QWORD PTR [rdx+32]
|
|
|
- and rax, r8
|
|
|
- and r9, r8
|
|
|
- and r10, r8
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- xor QWORD PTR [rcx], rax
|
|
|
- xor QWORD PTR [rcx+8], r9
|
|
|
- xor QWORD PTR [rcx+16], r10
|
|
|
- xor QWORD PTR [rcx+24], r11
|
|
|
- xor QWORD PTR [rcx+32], r12
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov r9, QWORD PTR [rcx+48]
|
|
|
- mov r10, QWORD PTR [rcx+56]
|
|
|
- mov r11, QWORD PTR [rcx+64]
|
|
|
- xor rax, QWORD PTR [rdx+40]
|
|
|
- xor r9, QWORD PTR [rdx+48]
|
|
|
- xor r10, QWORD PTR [rdx+56]
|
|
|
- xor r11, QWORD PTR [rdx+64]
|
|
|
- and rax, r8
|
|
|
- and r9, r8
|
|
|
- and r10, r8
|
|
|
- and r11, r8
|
|
|
- xor QWORD PTR [rcx+40], rax
|
|
|
- xor QWORD PTR [rcx+48], r9
|
|
|
- xor QWORD PTR [rcx+56], r10
|
|
|
- xor QWORD PTR [rcx+64], r11
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_cond_copy_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Multiply two Montgomery form numbers mod the modulus (prime).
|
|
|
-; * (r = a * b mod m)
|
|
|
-; *
|
|
|
-; * r Result of multiplication.
|
|
|
-; * a First number to multiply in Montgomery form.
|
|
|
-; * b Second number to multiply in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; * mp Montgomery multiplier.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_mul_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 144
|
|
|
- ; A[0] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r15, r15
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r14, rdx
|
|
|
- ; A[0] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r13, r13
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[1] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- mov QWORD PTR [rsp+8], r14
|
|
|
- ; A[0] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r14, r14
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[1] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- mov QWORD PTR [rsp+16], r15
|
|
|
- ; A[0] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r15, r15
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[1] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[2] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[3] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+24], r13
|
|
|
- ; A[0] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r13, r13
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[1] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[2] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[3] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[4] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- mov QWORD PTR [rsp+32], r14
|
|
|
- ; A[0] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r14, r14
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[1] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- mov QWORD PTR [rsp+40], r15
|
|
|
- ; A[0] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r15, r15
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[1] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[2] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[3] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[4] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[5] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[6] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+48], r13
|
|
|
- ; A[0] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r13, r13
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[1] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[2] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[3] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[4] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[5] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[6] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[7] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- mov QWORD PTR [rsp+56], r14
|
|
|
- ; A[0] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r14, r14
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[1] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- mov QWORD PTR [rsp+64], r15
|
|
|
- ; A[1] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- xor r15, r15
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[2] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[3] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[4] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[5] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[6] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[7] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[8] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+72], r13
|
|
|
- ; A[2] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- xor r13, r13
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[3] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[4] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[5] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[6] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[7] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[8] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- mov QWORD PTR [rsp+80], r14
|
|
|
- ; A[3] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- xor r14, r14
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- mov QWORD PTR [rsp+88], r15
|
|
|
- ; A[4] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- xor r15, r15
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[5] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[6] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[7] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[8] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+96], r13
|
|
|
- ; A[5] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- xor r13, r13
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[6] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[7] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- ; A[8] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- adc r13, 0
|
|
|
- mov QWORD PTR [rsp+104], r14
|
|
|
- ; A[6] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- xor r14, r14
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r15, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- mov QWORD PTR [rsp+112], r15
|
|
|
- ; A[7] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- xor r15, r15
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[8] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rsp+120], r13
|
|
|
- ; A[8] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r14, rax
|
|
|
- adc r15, rdx
|
|
|
- mov QWORD PTR [rsp+128], r14
|
|
|
- mov QWORD PTR [rsp+136], r15
|
|
|
- mov rax, QWORD PTR [rsp+64]
|
|
|
- mov rdx, QWORD PTR [rsp+72]
|
|
|
- mov r13, QWORD PTR [rsp+80]
|
|
|
- mov r12, rax
|
|
|
- and r12, 511
|
|
|
- mov r14, QWORD PTR [rsp+88]
|
|
|
- mov r15, QWORD PTR [rsp+96]
|
|
|
- mov r8, QWORD PTR [rsp+104]
|
|
|
- mov r9, QWORD PTR [rsp+112]
|
|
|
- mov r10, QWORD PTR [rsp+120]
|
|
|
- mov r11, QWORD PTR [rsp+128]
|
|
|
- shrd rax, rdx, 9
|
|
|
- shrd rdx, r13, 9
|
|
|
- shrd r13, r14, 9
|
|
|
- shrd r14, r15, 9
|
|
|
- shrd r15, r8, 9
|
|
|
- shrd r8, r9, 9
|
|
|
- shrd r9, r10, 9
|
|
|
- shrd r10, r11, 9
|
|
|
- shr r11, 9
|
|
|
- add rax, QWORD PTR [rsp]
|
|
|
- adc rdx, QWORD PTR [rsp+8]
|
|
|
- adc r13, QWORD PTR [rsp+16]
|
|
|
- adc r14, QWORD PTR [rsp+24]
|
|
|
- adc r15, QWORD PTR [rsp+32]
|
|
|
- adc r8, QWORD PTR [rsp+40]
|
|
|
- adc r9, QWORD PTR [rsp+48]
|
|
|
- adc r10, QWORD PTR [rsp+56]
|
|
|
- adc r12, r11
|
|
|
- mov r11, r12
|
|
|
- shr r12, 9
|
|
|
- and r11, 511
|
|
|
- add rax, r12
|
|
|
- adc rdx, 0
|
|
|
- adc r13, 0
|
|
|
- adc r14, 0
|
|
|
- adc r15, 0
|
|
|
- adc r8, 0
|
|
|
- adc r9, 0
|
|
|
- adc r10, 0
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r13
|
|
|
- mov QWORD PTR [rcx+24], r14
|
|
|
- mov QWORD PTR [rcx+32], r15
|
|
|
- mov QWORD PTR [rcx+40], r8
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- add rsp, 144
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mont_mul_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
|
|
|
-; *
|
|
|
-; * r Result of squaring.
|
|
|
-; * a Number to square in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; * mp Montgomery multiplier.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_sqr_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- mov r8, rdx
|
|
|
- sub rsp, 144
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul rax
|
|
|
- xor r12, r12
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r11, rdx
|
|
|
- ; A[0] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- ; A[0] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul rax
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+16], r12
|
|
|
- ; A[0] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- ; A[0] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- ; A[0] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r15, r15
|
|
|
- mov r13, rax
|
|
|
- mov r14, rdx
|
|
|
- ; A[1] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[2] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- add r13, r13
|
|
|
- adc r14, r14
|
|
|
- adc r15, r15
|
|
|
- add r12, r13
|
|
|
- adc r10, r14
|
|
|
- adc r11, r15
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- ; A[0] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r12, r12
|
|
|
- xor r15, r15
|
|
|
- mov r13, rax
|
|
|
- mov r14, rdx
|
|
|
- ; A[1] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[2] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul rax
|
|
|
- add r13, r13
|
|
|
- adc r14, r14
|
|
|
- adc r15, r15
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- add r10, r13
|
|
|
- adc r11, r14
|
|
|
- adc r12, r15
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- ; A[0] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r15, r15
|
|
|
- mov r13, rax
|
|
|
- mov r14, rdx
|
|
|
- ; A[1] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[2] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[3] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- add r13, r13
|
|
|
- adc r14, r14
|
|
|
- adc r15, r15
|
|
|
- add r11, r13
|
|
|
- adc r12, r14
|
|
|
- adc r10, r15
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- ; A[0] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r15, r15
|
|
|
- mov r13, rax
|
|
|
- mov r14, rdx
|
|
|
- ; A[1] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[2] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[3] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[4] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul rax
|
|
|
- add r13, r13
|
|
|
- adc r14, r14
|
|
|
- adc r15, r15
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- add r12, r13
|
|
|
- adc r10, r14
|
|
|
- adc r11, r15
|
|
|
- mov QWORD PTR [rsp+64], r12
|
|
|
- ; A[1] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- xor r12, r12
|
|
|
- xor r15, r15
|
|
|
- mov r13, rax
|
|
|
- mov r14, rdx
|
|
|
- ; A[2] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[3] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[4] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- add r13, r13
|
|
|
- adc r14, r14
|
|
|
- adc r15, r15
|
|
|
- add r10, r13
|
|
|
- adc r11, r14
|
|
|
- adc r12, r15
|
|
|
- mov QWORD PTR [rsp+72], r10
|
|
|
- ; A[2] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- xor r10, r10
|
|
|
- xor r15, r15
|
|
|
- mov r13, rax
|
|
|
- mov r14, rdx
|
|
|
- ; A[3] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[4] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[5] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul rax
|
|
|
- add r13, r13
|
|
|
- adc r14, r14
|
|
|
- adc r15, r15
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- add r11, r13
|
|
|
- adc r12, r14
|
|
|
- adc r10, r15
|
|
|
- mov QWORD PTR [rsp+80], r11
|
|
|
- ; A[3] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- xor r11, r11
|
|
|
- xor r15, r15
|
|
|
- mov r13, rax
|
|
|
- mov r14, rdx
|
|
|
- ; A[4] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- ; A[5] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r13, rax
|
|
|
- adc r14, rdx
|
|
|
- adc r15, 0
|
|
|
- add r13, r13
|
|
|
- adc r14, r14
|
|
|
- adc r15, r15
|
|
|
- add r12, r13
|
|
|
- adc r10, r14
|
|
|
- adc r11, r15
|
|
|
- mov QWORD PTR [rsp+88], r12
|
|
|
- ; A[4] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- ; A[5] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- ; A[6] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul rax
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+112], r12
|
|
|
- ; A[7] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+120], r10
|
|
|
- ; A[8] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- mov QWORD PTR [rsp+128], r11
|
|
|
- mov QWORD PTR [rsp+136], r12
|
|
|
- mov r10, QWORD PTR [rsp+64]
|
|
|
- mov r11, QWORD PTR [rsp+72]
|
|
|
- mov r12, QWORD PTR [rsp+80]
|
|
|
- mov r9, r10
|
|
|
- and r9, 511
|
|
|
- mov rax, QWORD PTR [rsp+88]
|
|
|
- mov rdx, QWORD PTR [rsp+96]
|
|
|
- mov r13, QWORD PTR [rsp+104]
|
|
|
- mov r14, QWORD PTR [rsp+112]
|
|
|
- mov r15, QWORD PTR [rsp+120]
|
|
|
- mov r8, QWORD PTR [rsp+128]
|
|
|
- shrd r10, r11, 9
|
|
|
- shrd r11, r12, 9
|
|
|
- shrd r12, rax, 9
|
|
|
- shrd rax, rdx, 9
|
|
|
- shrd rdx, r13, 9
|
|
|
- shrd r13, r14, 9
|
|
|
- shrd r14, r15, 9
|
|
|
- shrd r15, r8, 9
|
|
|
- shr r8, 9
|
|
|
- add r10, QWORD PTR [rsp]
|
|
|
- adc r11, QWORD PTR [rsp+8]
|
|
|
- adc r12, QWORD PTR [rsp+16]
|
|
|
- adc rax, QWORD PTR [rsp+24]
|
|
|
- adc rdx, QWORD PTR [rsp+32]
|
|
|
- adc r13, QWORD PTR [rsp+40]
|
|
|
- adc r14, QWORD PTR [rsp+48]
|
|
|
- adc r15, QWORD PTR [rsp+56]
|
|
|
- adc r9, r8
|
|
|
- mov r8, r9
|
|
|
- shr r9, 9
|
|
|
- and r8, 511
|
|
|
- add r10, r9
|
|
|
- adc r11, 0
|
|
|
- adc r12, 0
|
|
|
- adc rax, 0
|
|
|
- adc rdx, 0
|
|
|
- adc r13, 0
|
|
|
- adc r14, 0
|
|
|
- adc r15, 0
|
|
|
- adc r8, 0
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- mov QWORD PTR [rcx+24], rax
|
|
|
- mov QWORD PTR [rcx+32], rdx
|
|
|
- mov QWORD PTR [rcx+40], r13
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- mov QWORD PTR [rcx+56], r15
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- add rsp, 144
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mont_sqr_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Compare a with b in constant time.
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; * return -ve, 0 or +ve if a is less than, equal to or greater than b
|
|
|
-; * respectively.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_cmp_9 PROC
|
|
|
- push r12
|
|
|
- xor r9, r9
|
|
|
- mov r8, -1
|
|
|
- mov rax, -1
|
|
|
- mov r10, 1
|
|
|
- mov r11, QWORD PTR [rcx+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- mov r12, QWORD PTR [rdx+56]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+48]
|
|
|
- mov r12, QWORD PTR [rdx+48]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+32]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+8]
|
|
|
- mov r12, QWORD PTR [rdx+8]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx]
|
|
|
- mov r12, QWORD PTR [rdx]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- xor rax, r8
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_cmp_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_cond_sub_9 PROC
|
|
|
- sub rsp, 72
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- and r10, r9
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rsp]
|
|
|
- sub r10, r8
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rsp+8]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r8, QWORD PTR [rsp+16]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rsp+24]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rsp+32]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rsp+40]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r8, QWORD PTR [rsp+48]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rsp+56]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov r8, QWORD PTR [rsp+64]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- sbb rax, rax
|
|
|
- add rsp, 72
|
|
|
- ret
|
|
|
-sp_521_cond_sub_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 521 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_reduce_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- mov rdx, QWORD PTR [rcx+64]
|
|
|
- mov rax, QWORD PTR [rcx+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov r15, rdx
|
|
|
- and r15, 511
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- mov r10, QWORD PTR [rcx+96]
|
|
|
- mov r11, QWORD PTR [rcx+104]
|
|
|
- mov r12, QWORD PTR [rcx+112]
|
|
|
- mov r13, QWORD PTR [rcx+120]
|
|
|
- mov r14, QWORD PTR [rcx+128]
|
|
|
- shrd rdx, rax, 9
|
|
|
- shrd rax, r8, 9
|
|
|
- shrd r8, r9, 9
|
|
|
- shrd r9, r10, 9
|
|
|
- shrd r10, r11, 9
|
|
|
- shrd r11, r12, 9
|
|
|
- shrd r12, r13, 9
|
|
|
- shrd r13, r14, 9
|
|
|
- shr r14, 9
|
|
|
- add rdx, QWORD PTR [rcx]
|
|
|
- adc rax, QWORD PTR [rcx+8]
|
|
|
- adc r8, QWORD PTR [rcx+16]
|
|
|
- adc r9, QWORD PTR [rcx+24]
|
|
|
- adc r10, QWORD PTR [rcx+32]
|
|
|
- adc r11, QWORD PTR [rcx+40]
|
|
|
- adc r12, QWORD PTR [rcx+48]
|
|
|
- adc r13, QWORD PTR [rcx+56]
|
|
|
- adc r15, r14
|
|
|
- mov r14, r15
|
|
|
- shr r15, 9
|
|
|
- and r14, 511
|
|
|
- add rdx, r15
|
|
|
- adc rax, 0
|
|
|
- adc r8, 0
|
|
|
- adc r9, 0
|
|
|
- adc r10, 0
|
|
|
- adc r11, 0
|
|
|
- adc r12, 0
|
|
|
- adc r13, 0
|
|
|
- adc r14, 0
|
|
|
- mov QWORD PTR [rcx], rdx
|
|
|
- mov QWORD PTR [rcx+8], rax
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov QWORD PTR [rcx+48], r12
|
|
|
- mov QWORD PTR [rcx+56], r13
|
|
|
- mov QWORD PTR [rcx+64], r14
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mont_reduce_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 521 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_reduce_order_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov r9, rdx
|
|
|
- xor rsi, rsi
|
|
|
- ; i = 9
|
|
|
- mov r10, 9
|
|
|
- mov r15, QWORD PTR [rcx]
|
|
|
- mov rdi, QWORD PTR [rcx+8]
|
|
|
-L_521_mont_reduce_order_9_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov r13, r15
|
|
|
- imul r13, r8
|
|
|
- cmp r10, 1
|
|
|
- jne L_521_mont_reduce_order_9_nomask
|
|
|
- and r13, 511
|
|
|
-L_521_mont_reduce_order_9_nomask:
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- add r15, rax
|
|
|
- mov QWORD PTR [rcx], r15
|
|
|
- adc r12, rdx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- mov r15, rdi
|
|
|
- add r15, rax
|
|
|
- adc r11, rdx
|
|
|
- add r15, r12
|
|
|
- adc r11, 0
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- mov rdi, QWORD PTR [rcx+16]
|
|
|
- add rdi, rax
|
|
|
- adc r12, rdx
|
|
|
- add rdi, r11
|
|
|
- adc r12, 0
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- mov r14, QWORD PTR [rcx+24]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+24], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- mov r14, QWORD PTR [rcx+32]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+32], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- mov r14, QWORD PTR [rcx+40]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+40], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- mov r14, QWORD PTR [rcx+48]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- mov r14, QWORD PTR [rcx+56]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+56], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mov rax, r13
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- mov r14, QWORD PTR [rcx+64]
|
|
|
- add r11, rax
|
|
|
- adc rdx, rsi
|
|
|
- mov rsi, 0
|
|
|
- adc rsi, 0
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+64], r14
|
|
|
- adc QWORD PTR [rcx+72], rdx
|
|
|
- adc rsi, 0
|
|
|
- ; i -= 1
|
|
|
- add rcx, 8
|
|
|
- dec r10
|
|
|
- jnz L_521_mont_reduce_order_9_loop
|
|
|
- mov QWORD PTR [rcx], r15
|
|
|
- mov QWORD PTR [rcx+8], rdi
|
|
|
- mov r8, rcx
|
|
|
- sub rcx, 72
|
|
|
- sub r8, 8
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mov rdx, QWORD PTR [r8+8]
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r13, QWORD PTR [r8+32]
|
|
|
- shrd rax, rdx, 9
|
|
|
- shrd rdx, r10, 9
|
|
|
- shrd r10, r11, 9
|
|
|
- shrd r11, r13, 9
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rdx, QWORD PTR [r8+40]
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- shrd r13, rdx, 9
|
|
|
- shrd rdx, r10, 9
|
|
|
- shrd r10, r11, 9
|
|
|
- shrd r11, rax, 9
|
|
|
- mov QWORD PTR [rcx+32], r13
|
|
|
- mov QWORD PTR [rcx+40], rdx
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rdx, QWORD PTR [r8+72]
|
|
|
- shrd rax, rdx, 9
|
|
|
- shr rdx, 9
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], rdx
|
|
|
- mov rsi, QWORD PTR [rcx+64]
|
|
|
- shr rsi, 9
|
|
|
- neg rsi
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, r9
|
|
|
- mov r9, rsi
|
|
|
-ELSE
|
|
|
- mov r9, rsi
|
|
|
- mov r8, r9
|
|
|
-ENDIF
|
|
|
- mov rdx, rcx
|
|
|
- call sp_521_cond_sub_9
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mont_reduce_order_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Add two Montgomery form numbers (r = a + b % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a First number to add in Montgomery form.
|
|
|
-; * b Second number to add in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_add_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- mov r13, QWORD PTR [rdx+40]
|
|
|
- mov r14, QWORD PTR [rdx+48]
|
|
|
- mov r15, QWORD PTR [rdx+56]
|
|
|
- mov rdi, QWORD PTR [rdx+64]
|
|
|
- add rax, QWORD PTR [r8]
|
|
|
- adc r9, QWORD PTR [r8+8]
|
|
|
- adc r10, QWORD PTR [r8+16]
|
|
|
- adc r11, QWORD PTR [r8+24]
|
|
|
- adc r12, QWORD PTR [r8+32]
|
|
|
- adc r13, QWORD PTR [r8+40]
|
|
|
- adc r14, QWORD PTR [r8+48]
|
|
|
- adc r15, QWORD PTR [r8+56]
|
|
|
- adc rdi, QWORD PTR [r8+64]
|
|
|
- mov rsi, rdi
|
|
|
- and rdi, 511
|
|
|
- shr rsi, 9
|
|
|
- add rax, rsi
|
|
|
- adc r9, 0
|
|
|
- adc r10, 0
|
|
|
- adc r11, 0
|
|
|
- adc r12, 0
|
|
|
- adc r13, 0
|
|
|
- adc r14, 0
|
|
|
- adc r15, 0
|
|
|
- adc rdi, 0
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov QWORD PTR [rcx+32], r12
|
|
|
- mov QWORD PTR [rcx+40], r13
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- mov QWORD PTR [rcx+56], r15
|
|
|
- mov QWORD PTR [rcx+64], rdi
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mont_add_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Double a Montgomery form number (r = a + a % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a Number to souble in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_dbl_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- mov r13, QWORD PTR [rdx+48]
|
|
|
- mov r14, QWORD PTR [rdx+56]
|
|
|
- mov r15, QWORD PTR [rdx+64]
|
|
|
- add rax, rax
|
|
|
- adc r8, r8
|
|
|
- adc r9, r9
|
|
|
- adc r10, r10
|
|
|
- adc r11, r11
|
|
|
- adc r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- adc r15, r15
|
|
|
- mov rdi, r15
|
|
|
- and r15, 511
|
|
|
- shr rdi, 9
|
|
|
- add rax, rdi
|
|
|
- adc r8, 0
|
|
|
- adc r9, 0
|
|
|
- adc r10, 0
|
|
|
- adc r11, 0
|
|
|
- adc r12, 0
|
|
|
- adc r13, 0
|
|
|
- adc r14, 0
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- mov QWORD PTR [rcx+48], r13
|
|
|
- mov QWORD PTR [rcx+56], r14
|
|
|
- mov QWORD PTR [rcx+64], r15
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mont_dbl_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Triple a Montgomery form number (r = a + a + a % m).
|
|
|
-; *
|
|
|
-; * r Result of Tripling.
|
|
|
-; * a Number to triple in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_tpl_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- mov r13, QWORD PTR [rdx+48]
|
|
|
- mov r14, QWORD PTR [rdx+56]
|
|
|
- mov r15, QWORD PTR [rdx+64]
|
|
|
- add rax, rax
|
|
|
- adc r8, r8
|
|
|
- adc r9, r9
|
|
|
- adc r10, r10
|
|
|
- adc r11, r11
|
|
|
- adc r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- adc r15, r15
|
|
|
- add rax, QWORD PTR [rdx]
|
|
|
- adc r8, QWORD PTR [rdx+8]
|
|
|
- adc r9, QWORD PTR [rdx+16]
|
|
|
- adc r10, QWORD PTR [rdx+24]
|
|
|
- adc r11, QWORD PTR [rdx+32]
|
|
|
- adc r12, QWORD PTR [rdx+40]
|
|
|
- adc r13, QWORD PTR [rdx+48]
|
|
|
- adc r14, QWORD PTR [rdx+56]
|
|
|
- adc r15, QWORD PTR [rdx+64]
|
|
|
- mov rdi, r15
|
|
|
- and r15, 511
|
|
|
- shr rdi, 9
|
|
|
- add rax, rdi
|
|
|
- adc r8, 0
|
|
|
- adc r9, 0
|
|
|
- adc r10, 0
|
|
|
- adc r11, 0
|
|
|
- adc r12, 0
|
|
|
- adc r13, 0
|
|
|
- adc r14, 0
|
|
|
- adc r15, 0
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- mov QWORD PTR [rcx+48], r13
|
|
|
- mov QWORD PTR [rcx+56], r14
|
|
|
- mov QWORD PTR [rcx+64], r15
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mont_tpl_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Subtract two Montgomery form numbers (r = a - b % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a First number to add in Montgomery form.
|
|
|
-; * b Second number to add in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_sub_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- mov r13, QWORD PTR [rdx+40]
|
|
|
- mov r14, QWORD PTR [rdx+48]
|
|
|
- mov r15, QWORD PTR [rdx+56]
|
|
|
- mov rdi, QWORD PTR [rdx+64]
|
|
|
- sub rax, QWORD PTR [r8]
|
|
|
- sbb r9, QWORD PTR [r8+8]
|
|
|
- sbb r10, QWORD PTR [r8+16]
|
|
|
- sbb r11, QWORD PTR [r8+24]
|
|
|
- sbb r12, QWORD PTR [r8+32]
|
|
|
- sbb r13, QWORD PTR [r8+40]
|
|
|
- sbb r14, QWORD PTR [r8+48]
|
|
|
- sbb r15, QWORD PTR [r8+56]
|
|
|
- sbb rdi, QWORD PTR [r8+64]
|
|
|
- mov rsi, rdi
|
|
|
- and rdi, 511
|
|
|
- sar rsi, 9
|
|
|
- neg rsi
|
|
|
- sub rax, rsi
|
|
|
- sbb r9, 0
|
|
|
- sbb r10, 0
|
|
|
- sbb r11, 0
|
|
|
- sbb r12, 0
|
|
|
- sbb r13, 0
|
|
|
- sbb r14, 0
|
|
|
- sbb r15, 0
|
|
|
- sbb rdi, 0
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov QWORD PTR [rcx+32], r12
|
|
|
- mov QWORD PTR [rcx+40], r13
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- mov QWORD PTR [rcx+56], r15
|
|
|
- mov QWORD PTR [rcx+64], rdi
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mont_sub_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
|
|
|
-; *
|
|
|
-; * r Result of division by 2.
|
|
|
-; * a Number to divide.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_div2_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- mov r13, QWORD PTR [rdx+48]
|
|
|
- mov r14, QWORD PTR [rdx+56]
|
|
|
- mov r15, QWORD PTR [rdx+64]
|
|
|
- mov rdi, rax
|
|
|
- and rdi, 1
|
|
|
- sub rax, rdi
|
|
|
- sbb r8, 0
|
|
|
- sbb r9, 0
|
|
|
- sbb r10, 0
|
|
|
- sbb r11, 0
|
|
|
- sbb r12, 0
|
|
|
- sbb r13, 0
|
|
|
- sbb r14, 0
|
|
|
- sbb r15, 0
|
|
|
- shl rdi, 9
|
|
|
- add r15, rdi
|
|
|
- shrd rax, r8, 1
|
|
|
- shrd r8, r9, 1
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r11, 1
|
|
|
- shrd r11, r12, 1
|
|
|
- shrd r12, r13, 1
|
|
|
- shrd r13, r14, 1
|
|
|
- shrd r14, r15, 1
|
|
|
- shr r15, 1
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- mov QWORD PTR [rcx+48], r13
|
|
|
- mov QWORD PTR [rcx+56], r14
|
|
|
- mov QWORD PTR [rcx+64], r15
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mont_div2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-; /* Touch each possible point that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of point to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_get_point_33_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- sub rsp, 160
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- vmovdqu OWORD PTR [rsp+128], xmm14
|
|
|
- vmovdqu OWORD PTR [rsp+144], xmm15
|
|
|
- mov r14, 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- add rdx, 440
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 32
|
|
|
- pshufd xmm15, xmm15, 0
|
|
|
- pshufd xmm13, xmm13, 0
|
|
|
- pxor xmm14, xmm14
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- xor r12, r12
|
|
|
- xor r13, r13
|
|
|
- movdqa xmm14, xmm15
|
|
|
-L_521_get_point_33_9_start_1:
|
|
|
- movdqa xmm12, xmm14
|
|
|
- paddd xmm14, xmm15
|
|
|
- pcmpeqd xmm12, xmm13
|
|
|
- xor r9, r9
|
|
|
- cmp r8, r14
|
|
|
- sete r9b
|
|
|
- neg r9
|
|
|
- inc r14
|
|
|
- movdqu xmm6, [rdx]
|
|
|
- movdqu xmm7, [rdx+16]
|
|
|
- movdqu xmm8, [rdx+32]
|
|
|
- movdqu xmm9, [rdx+48]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- movdqu xmm10, [rdx+144]
|
|
|
- movdqu xmm11, [rdx+160]
|
|
|
- add rdx, 440
|
|
|
- pand xmm6, xmm12
|
|
|
- pand xmm7, xmm12
|
|
|
- pand xmm8, xmm12
|
|
|
- pand xmm9, xmm12
|
|
|
- pand xmm10, xmm12
|
|
|
- pand xmm11, xmm12
|
|
|
- and r10, r9
|
|
|
- por xmm0, xmm6
|
|
|
- por xmm1, xmm7
|
|
|
- por xmm2, xmm8
|
|
|
- por xmm3, xmm9
|
|
|
- por xmm4, xmm10
|
|
|
- por xmm5, xmm11
|
|
|
- or r12, r10
|
|
|
- dec rax
|
|
|
- jnz L_521_get_point_33_9_start_1
|
|
|
- movdqu [rcx], xmm0
|
|
|
- movdqu [rcx+16], xmm1
|
|
|
- movdqu [rcx+32], xmm2
|
|
|
- movdqu [rcx+48], xmm3
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- movdqu [rcx+144], xmm4
|
|
|
- movdqu [rcx+160], xmm5
|
|
|
- mov r14, 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- sub rdx, 14080
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 32
|
|
|
- pshufd xmm15, xmm15, 0
|
|
|
- pshufd xmm13, xmm13, 0
|
|
|
- pxor xmm14, xmm14
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- pxor xmm4, xmm4
|
|
|
- pxor xmm5, xmm5
|
|
|
- xor r12, r12
|
|
|
- xor r13, r13
|
|
|
- movdqa xmm14, xmm15
|
|
|
-L_521_get_point_33_9_start_2:
|
|
|
- movdqa xmm12, xmm14
|
|
|
- paddd xmm14, xmm15
|
|
|
- pcmpeqd xmm12, xmm13
|
|
|
- xor r9, r9
|
|
|
- cmp r8, r14
|
|
|
- sete r9b
|
|
|
- neg r9
|
|
|
- inc r14
|
|
|
- movdqu xmm6, [rdx+176]
|
|
|
- movdqu xmm7, [rdx+192]
|
|
|
- mov r10, QWORD PTR [rdx+208]
|
|
|
- movdqu xmm8, [rdx+288]
|
|
|
- movdqu xmm9, [rdx+304]
|
|
|
- movdqu xmm10, [rdx+320]
|
|
|
- movdqu xmm11, [rdx+336]
|
|
|
- mov r11, QWORD PTR [rdx+352]
|
|
|
- add rdx, 440
|
|
|
- pand xmm6, xmm12
|
|
|
- pand xmm7, xmm12
|
|
|
- pand xmm8, xmm12
|
|
|
- pand xmm9, xmm12
|
|
|
- pand xmm10, xmm12
|
|
|
- pand xmm11, xmm12
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- por xmm0, xmm6
|
|
|
- por xmm1, xmm7
|
|
|
- por xmm2, xmm8
|
|
|
- por xmm3, xmm9
|
|
|
- por xmm4, xmm10
|
|
|
- por xmm5, xmm11
|
|
|
- or r12, r10
|
|
|
- or r13, r11
|
|
|
- dec rax
|
|
|
- jnz L_521_get_point_33_9_start_2
|
|
|
- movdqu [rcx+176], xmm0
|
|
|
- movdqu [rcx+192], xmm1
|
|
|
- mov QWORD PTR [rcx+208], r12
|
|
|
- movdqu [rcx+288], xmm2
|
|
|
- movdqu [rcx+304], xmm3
|
|
|
- movdqu [rcx+320], xmm4
|
|
|
- movdqu [rcx+336], xmm5
|
|
|
- mov QWORD PTR [rcx+352], r13
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- vmovdqu xmm14, OWORD PTR [rsp+128]
|
|
|
- vmovdqu xmm15, OWORD PTR [rsp+144]
|
|
|
- add rsp, 160
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_get_point_33_9 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Touch each possible point that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of point to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_get_point_33_avx2_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- sub rsp, 160
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- vmovdqu OWORD PTR [rsp+128], xmm14
|
|
|
- vmovdqu OWORD PTR [rsp+144], xmm15
|
|
|
- mov rdi, 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- add rdx, 440
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 32
|
|
|
- vpxor ymm14, ymm14, ymm14
|
|
|
- vpermd ymm13, ymm14, ymm13
|
|
|
- vpermd ymm15, ymm14, ymm15
|
|
|
- vpxor ymm0, ymm0, ymm0
|
|
|
- vpxor ymm1, ymm1, ymm1
|
|
|
- vpxor ymm2, ymm2, ymm2
|
|
|
- vpxor ymm3, ymm3, ymm3
|
|
|
- vpxor ymm4, ymm4, ymm4
|
|
|
- vpxor ymm5, ymm5, ymm5
|
|
|
- xor r10, r10
|
|
|
- xor r11, r11
|
|
|
- xor r12, r12
|
|
|
- vmovdqa ymm14, ymm15
|
|
|
-L_521_get_point_33_avx2_9_start:
|
|
|
- vpcmpeqd ymm12, ymm14, ymm13
|
|
|
- vpaddd ymm14, ymm14, ymm15
|
|
|
- xor r9, r9
|
|
|
- cmp r8, rdi
|
|
|
- sete r9b
|
|
|
- neg r9
|
|
|
- inc rdi
|
|
|
- vmovupd ymm6, YMMWORD PTR [rdx]
|
|
|
- vmovupd ymm7, YMMWORD PTR [rdx+32]
|
|
|
- vmovupd ymm8, YMMWORD PTR [rdx+144]
|
|
|
- vmovupd ymm9, YMMWORD PTR [rdx+176]
|
|
|
- vmovupd ymm10, YMMWORD PTR [rdx+288]
|
|
|
- vmovupd ymm11, YMMWORD PTR [rdx+320]
|
|
|
- mov r13, QWORD PTR [rdx+64]
|
|
|
- mov r14, QWORD PTR [rdx+208]
|
|
|
- mov r15, QWORD PTR [rdx+352]
|
|
|
- add rdx, 440
|
|
|
- vpand ymm6, ymm6, ymm12
|
|
|
- vpand ymm7, ymm7, ymm12
|
|
|
- vpand ymm8, ymm8, ymm12
|
|
|
- vpand ymm9, ymm9, ymm12
|
|
|
- vpand ymm10, ymm10, ymm12
|
|
|
- vpand ymm11, ymm11, ymm12
|
|
|
- and r13, r9
|
|
|
- and r14, r9
|
|
|
- and r15, r9
|
|
|
- vpor ymm0, ymm0, ymm6
|
|
|
- vpor ymm1, ymm1, ymm7
|
|
|
- vpor ymm2, ymm2, ymm8
|
|
|
- vpor ymm3, ymm3, ymm9
|
|
|
- vpor ymm4, ymm4, ymm10
|
|
|
- vpor ymm5, ymm5, ymm11
|
|
|
- or r10, r13
|
|
|
- or r11, r14
|
|
|
- or r12, r15
|
|
|
- dec rax
|
|
|
- jnz L_521_get_point_33_avx2_9_start
|
|
|
- vmovupd YMMWORD PTR [rcx], ymm0
|
|
|
- vmovupd YMMWORD PTR [rcx+32], ymm1
|
|
|
- vmovupd YMMWORD PTR [rcx+144], ymm2
|
|
|
- vmovupd YMMWORD PTR [rcx+176], ymm3
|
|
|
- vmovupd YMMWORD PTR [rcx+288], ymm4
|
|
|
- vmovupd YMMWORD PTR [rcx+320], ymm5
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov QWORD PTR [rcx+208], r11
|
|
|
- mov QWORD PTR [rcx+352], r12
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- vmovdqu xmm14, OWORD PTR [rsp+128]
|
|
|
- vmovdqu xmm15, OWORD PTR [rsp+144]
|
|
|
- add rsp, 160
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_get_point_33_avx2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply two Montgomery form numbers mod the modulus (prime).
|
|
|
-; * (r = a * b mod m)
|
|
|
-; *
|
|
|
-; * r Result of multiplication.
|
|
|
-; * a First number to multiply in Montgomery form.
|
|
|
-; * b Second number to multiply in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; * mp Montgomery multiplier.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_mul_avx2_9 PROC
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- mov rbp, r8
|
|
|
- mov r8, rcx
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 144
|
|
|
- mov rbx, rsp
|
|
|
- add rsp, 72
|
|
|
- xor r15, r15
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- ; A[0] * B[0]
|
|
|
- mulx r11, r10, QWORD PTR [rbp]
|
|
|
- ; A[0] * B[1]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx], r10
|
|
|
- adcx r11, rax
|
|
|
- ; A[0] * B[2]
|
|
|
- mulx r13, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+8], r11
|
|
|
- adcx r12, rax
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- ; A[0] * B[3]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r13, rax
|
|
|
- ; A[0] * B[4]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r10, rax
|
|
|
- ; A[0] * B[5]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+32], r10
|
|
|
- adcx r11, rax
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- ; A[0] * B[6]
|
|
|
- mulx r13, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r12, rax
|
|
|
- ; A[0] * B[7]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- adcx r13, rax
|
|
|
- ; A[0] * B[8]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, r15
|
|
|
- mov r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mov r11, QWORD PTR [rbx+8]
|
|
|
- mov r12, QWORD PTR [rbx+16]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r10, QWORD PTR [rbx+32]
|
|
|
- ; A[1] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[1] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- mov r11, QWORD PTR [rbx+40]
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- ; A[1] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- ; A[1] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[1] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- mov r12, r15
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- mov QWORD PTR [rsp+8], r12
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mov r12, QWORD PTR [rbx+16]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r10, QWORD PTR [rbx+32]
|
|
|
- mov r11, QWORD PTR [rbx+40]
|
|
|
- ; A[2] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[2] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[2] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+32], r10
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- ; A[2] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[2] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- mov r12, QWORD PTR [rsp+8]
|
|
|
- ; A[2] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[2] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- mov r13, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- adcx r13, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [rsp+8], r12
|
|
|
- mov QWORD PTR [rsp+16], r13
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r10, QWORD PTR [rbx+32]
|
|
|
- mov r11, QWORD PTR [rbx+40]
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- ; A[3] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[3] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[3] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- ; A[3] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[3] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[3] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- mov r12, QWORD PTR [rsp+8]
|
|
|
- mov r13, QWORD PTR [rsp+16]
|
|
|
- ; A[3] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[3] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[3] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [rsp+8], r12
|
|
|
- mov r10, r15
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [rsp+16], r13
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mov r10, QWORD PTR [rbx+32]
|
|
|
- mov r11, QWORD PTR [rbx+40]
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- ; A[4] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[4] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- mov r12, QWORD PTR [rsp+8]
|
|
|
- ; A[4] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- mov r13, QWORD PTR [rsp+16]
|
|
|
- mov r10, QWORD PTR [rsp+24]
|
|
|
- ; A[4] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[4] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rsp+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [rsp+16], r13
|
|
|
- mov r11, r15
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mov r11, QWORD PTR [rbx+40]
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- ; A[5] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[5] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[5] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- mov r12, QWORD PTR [rsp+8]
|
|
|
- mov r13, QWORD PTR [rsp+16]
|
|
|
- ; A[5] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[5] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rsp+8], r12
|
|
|
- mov r10, QWORD PTR [rsp+24]
|
|
|
- mov r11, QWORD PTR [rsp+32]
|
|
|
- ; A[5] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rsp+16], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- mov r12, r15
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mov r12, QWORD PTR [rbx+48]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- ; A[6] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[6] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[6] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- mov r12, QWORD PTR [rsp+8]
|
|
|
- mov r13, QWORD PTR [rsp+16]
|
|
|
- mov r10, QWORD PTR [rsp+24]
|
|
|
- ; A[6] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[6] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rsp+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rsp+16], r13
|
|
|
- mov r11, QWORD PTR [rsp+32]
|
|
|
- mov r12, QWORD PTR [rsp+40]
|
|
|
- ; A[6] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- mov r13, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- adcx r13, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- mov QWORD PTR [rsp+48], r13
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mov r13, QWORD PTR [rbx+56]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- mov r12, QWORD PTR [rsp+8]
|
|
|
- ; A[7] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+56], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[7] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- mov r13, QWORD PTR [rsp+16]
|
|
|
- mov r10, QWORD PTR [rsp+24]
|
|
|
- mov r11, QWORD PTR [rsp+32]
|
|
|
- ; A[7] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[7] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rsp+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rsp+16], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- mov r12, QWORD PTR [rsp+40]
|
|
|
- mov r13, QWORD PTR [rsp+48]
|
|
|
- ; A[7] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[7] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- mov r10, r15
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r15
|
|
|
- adox r14, r15
|
|
|
- adcx r14, r15
|
|
|
- mov QWORD PTR [rsp+48], r13
|
|
|
- mov QWORD PTR [rsp+56], r10
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mov r10, QWORD PTR [rbx+64]
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- mov r12, QWORD PTR [rsp+8]
|
|
|
- mov r13, QWORD PTR [rsp+16]
|
|
|
- ; A[8] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[8] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+64], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[8] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rsp+8], r12
|
|
|
- mov r10, QWORD PTR [rsp+24]
|
|
|
- mov r11, QWORD PTR [rsp+32]
|
|
|
- mov r12, QWORD PTR [rsp+40]
|
|
|
- ; A[8] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[8] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- mov QWORD PTR [rsp+16], r13
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[8] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- mov r13, QWORD PTR [rsp+48]
|
|
|
- mov r10, QWORD PTR [rsp+56]
|
|
|
- ; A[8] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[8] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[8] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- mov QWORD PTR [rsp+48], r13
|
|
|
- mov r11, r15
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r14
|
|
|
- mov QWORD PTR [rsp+56], r10
|
|
|
- mov QWORD PTR [rsp+64], r11
|
|
|
- mov rax, QWORD PTR [rsp+-8]
|
|
|
- mov rcx, QWORD PTR [rsp]
|
|
|
- mov r10, QWORD PTR [rsp+8]
|
|
|
- mov r15, rax
|
|
|
- and r15, 511
|
|
|
- mov r11, QWORD PTR [rsp+16]
|
|
|
- mov r12, QWORD PTR [rsp+24]
|
|
|
- mov r13, QWORD PTR [rsp+32]
|
|
|
- mov r14, QWORD PTR [rsp+40]
|
|
|
- mov rbx, QWORD PTR [rsp+48]
|
|
|
- mov rdx, QWORD PTR [rsp+56]
|
|
|
- sub rsp, 72
|
|
|
- shrd rax, rcx, 9
|
|
|
- shrd rcx, r10, 9
|
|
|
- shrd r10, r11, 9
|
|
|
- shrd r11, r12, 9
|
|
|
- shrd r12, r13, 9
|
|
|
- shrd r13, r14, 9
|
|
|
- shrd r14, rbx, 9
|
|
|
- shrd rbx, rdx, 9
|
|
|
- shr rdx, 9
|
|
|
- add rax, QWORD PTR [rsp]
|
|
|
- adc rcx, QWORD PTR [rsp+8]
|
|
|
- adc r10, QWORD PTR [rsp+16]
|
|
|
- adc r11, QWORD PTR [rsp+24]
|
|
|
- adc r12, QWORD PTR [rsp+32]
|
|
|
- adc r13, QWORD PTR [rsp+40]
|
|
|
- adc r14, QWORD PTR [rsp+48]
|
|
|
- adc rbx, QWORD PTR [rsp+56]
|
|
|
- adc r15, rdx
|
|
|
- mov rdx, r15
|
|
|
- shr r15, 9
|
|
|
- and rdx, 511
|
|
|
- add rax, r15
|
|
|
- adc rcx, 0
|
|
|
- adc r10, 0
|
|
|
- adc r11, 0
|
|
|
- adc r12, 0
|
|
|
- adc r13, 0
|
|
|
- adc r14, 0
|
|
|
- adc rbx, 0
|
|
|
- adc rdx, 0
|
|
|
- mov QWORD PTR [r8], rax
|
|
|
- mov QWORD PTR [r8+8], rcx
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- mov QWORD PTR [r8+24], r11
|
|
|
- mov QWORD PTR [r8+32], r12
|
|
|
- mov QWORD PTR [r8+40], r13
|
|
|
- mov QWORD PTR [r8+48], r14
|
|
|
- mov QWORD PTR [r8+56], rbx
|
|
|
- mov QWORD PTR [r8+64], rdx
|
|
|
- add rsp, 144
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- ret
|
|
|
-sp_521_mont_mul_avx2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square the Montgomery form number mod the modulus (prime). (r = a * a mod m)
|
|
|
-; *
|
|
|
-; * r Result of squaring.
|
|
|
-; * a Number to square in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; * mp Montgomery multiplier.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_sqr_avx2_9 PROC
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov r8, rcx
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 144
|
|
|
- mov rbp, rsp
|
|
|
- add rsp, 72
|
|
|
- xor r12, r12
|
|
|
- ; Diagonal 1
|
|
|
- ; Zero into %r9
|
|
|
- ; A[1] x A[0]
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- mulx r11, r10, QWORD PTR [r9+8]
|
|
|
- mov QWORD PTR [rbp+8], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[2] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [rbp+16], r11
|
|
|
- ; No load %r12 - %r9
|
|
|
- ; A[3] x A[0]
|
|
|
- mulx r14, rax, QWORD PTR [r9+24]
|
|
|
- adcx r10, rax
|
|
|
- adox r14, r12
|
|
|
- mov QWORD PTR [rbp+24], r10
|
|
|
- ; No load %r13 - %r8
|
|
|
- ; A[4] x A[0]
|
|
|
- mulx r15, rax, QWORD PTR [r9+32]
|
|
|
- adcx r14, rax
|
|
|
- adox r15, r12
|
|
|
- ; No store %r12 - %r9
|
|
|
- ; No load %r14 - %r9
|
|
|
- ; A[5] x A[0]
|
|
|
- mulx rdi, rax, QWORD PTR [r9+40]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, r12
|
|
|
- ; No store %r13 - %r8
|
|
|
- ; No load %r15 - %r8
|
|
|
- ; A[6] x A[0]
|
|
|
- mulx rsi, rax, QWORD PTR [r9+48]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, r12
|
|
|
- ; No store %r14 - %r9
|
|
|
- ; No load %rbx - %r9
|
|
|
- ; A[7] x A[0]
|
|
|
- mulx rbx, rax, QWORD PTR [r9+56]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, r12
|
|
|
- ; No store %r15 - %r8
|
|
|
- ; Zero into %r8
|
|
|
- ; A[8] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+64]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, r12
|
|
|
- ; No store %rbx - %r9
|
|
|
- ; Zero into %r9
|
|
|
- ; A[8] x A[1]
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mulx r11, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- ; Carry
|
|
|
- adcx r11, r12
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- ; Diagonal 2
|
|
|
- mov r11, QWORD PTR [rbp+24]
|
|
|
- ; No load %r12 - %r8
|
|
|
- ; A[2] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbp+24], r11
|
|
|
- ; No load %r13 - %r9
|
|
|
- ; A[3] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+24]
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; No store %r12 - %r8
|
|
|
- ; No load %r14 - %r8
|
|
|
- ; A[4] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; No store %r13 - %r9
|
|
|
- ; No load %r15 - %r9
|
|
|
- ; A[5] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; No store %r14 - %r8
|
|
|
- ; No load %rbx - %r8
|
|
|
- ; A[6] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r15 - %r9
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- ; A[7] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov r10, QWORD PTR [rsp+8]
|
|
|
- ; A[7] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[7] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx r11, rax, QWORD PTR [r9+56]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [rsp+8], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[7] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx r10, rax, QWORD PTR [r9+56]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [rsp+16], r11
|
|
|
- ; Carry
|
|
|
- adcx r10, r13
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- ; Diagonal 3
|
|
|
- ; No load %r14 - %r9
|
|
|
- ; A[3] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+24]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; No store %r13 - %r8
|
|
|
- ; No load %r15 - %r8
|
|
|
- ; A[4] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; No store %r14 - %r9
|
|
|
- ; No load %rbx - %r9
|
|
|
- ; A[5] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r15 - %r8
|
|
|
- mov r10, QWORD PTR [rsp]
|
|
|
- ; A[6] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, rcx
|
|
|
- ; No store %rbx - %r9
|
|
|
- mov r11, QWORD PTR [rsp+8]
|
|
|
- ; A[6] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov r10, QWORD PTR [rsp+16]
|
|
|
- ; A[6] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r11, QWORD PTR [rsp+24]
|
|
|
- ; A[6] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[8] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx r10, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[8] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx r11, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- ; Carry
|
|
|
- adcx r11, r13
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- ; Diagonal 4
|
|
|
- ; No load %rbx - %r8
|
|
|
- ; A[4] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r15 - %r9
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- ; A[5] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov r10, QWORD PTR [rsp+8]
|
|
|
- ; A[5] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- mov r11, QWORD PTR [rsp+16]
|
|
|
- ; A[8] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rsp+8], r10
|
|
|
- mov r10, QWORD PTR [rsp+24]
|
|
|
- ; A[8] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rsp+16], r11
|
|
|
- mov r11, QWORD PTR [rsp+32]
|
|
|
- ; A[7] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- mov r10, QWORD PTR [rsp+40]
|
|
|
- ; A[7] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- ; Zero into %r9
|
|
|
- ; A[8] x A[6]
|
|
|
- mulx r11, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r12
|
|
|
- mov QWORD PTR [rsp+40], r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[8] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx r10, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r10, r12
|
|
|
- mov QWORD PTR [rsp+48], r11
|
|
|
- ; Carry
|
|
|
- adcx r10, r13
|
|
|
- mov r13, r12
|
|
|
- adcx r13, r12
|
|
|
- adox r13, r12
|
|
|
- mov QWORD PTR [rsp+56], r10
|
|
|
- mov QWORD PTR [rsp+64], r13
|
|
|
- ; Double and Add in A[i] x A[i]
|
|
|
- mov r11, QWORD PTR [rbp+8]
|
|
|
- ; A[0] x A[0]
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- mov QWORD PTR [rbp], rax
|
|
|
- adox r11, r11
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+8], r11
|
|
|
- mov r10, QWORD PTR [rbp+16]
|
|
|
- mov r11, QWORD PTR [rbp+24]
|
|
|
- ; A[1] x A[1]
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+16], r10
|
|
|
- mov QWORD PTR [rbp+24], r11
|
|
|
- ; A[2] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r14, r14
|
|
|
- adox r15, r15
|
|
|
- adcx r14, rax
|
|
|
- adcx r15, rcx
|
|
|
- ; A[3] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox rdi, rdi
|
|
|
- adox rsi, rsi
|
|
|
- adcx rdi, rax
|
|
|
- adcx rsi, rcx
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- ; A[4] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox rbx, rbx
|
|
|
- adox r11, r11
|
|
|
- adcx rbx, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- mov r10, QWORD PTR [rsp+8]
|
|
|
- mov r11, QWORD PTR [rsp+16]
|
|
|
- ; A[5] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rsp+8], r10
|
|
|
- mov QWORD PTR [rsp+16], r11
|
|
|
- mov r10, QWORD PTR [rsp+24]
|
|
|
- mov r11, QWORD PTR [rsp+32]
|
|
|
- ; A[6] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- mov r10, QWORD PTR [rsp+40]
|
|
|
- mov r11, QWORD PTR [rsp+48]
|
|
|
- ; A[7] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rsp+40], r10
|
|
|
- mov QWORD PTR [rsp+48], r11
|
|
|
- mov r10, QWORD PTR [rsp+56]
|
|
|
- mov r11, QWORD PTR [rsp+64]
|
|
|
- ; A[8] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rsp+56], r10
|
|
|
- mov QWORD PTR [rsp+64], r11
|
|
|
- mov QWORD PTR [rsp+-40], r14
|
|
|
- mov QWORD PTR [rsp+-32], r15
|
|
|
- mov QWORD PTR [rsp+-24], rdi
|
|
|
- mov QWORD PTR [rsp+-16], rsi
|
|
|
- mov QWORD PTR [rsp+-8], rbx
|
|
|
- mov r10, QWORD PTR [rsp+-8]
|
|
|
- mov r11, QWORD PTR [rsp]
|
|
|
- mov r14, QWORD PTR [rsp+8]
|
|
|
- mov rcx, r10
|
|
|
- and rcx, 511
|
|
|
- mov r15, QWORD PTR [rsp+16]
|
|
|
- mov rdi, QWORD PTR [rsp+24]
|
|
|
- mov rsi, QWORD PTR [rsp+32]
|
|
|
- mov rbx, QWORD PTR [rsp+40]
|
|
|
- mov rdx, QWORD PTR [rsp+48]
|
|
|
- mov rax, QWORD PTR [rsp+56]
|
|
|
- sub rsp, 72
|
|
|
- shrd r10, r11, 9
|
|
|
- shrd r11, r14, 9
|
|
|
- shrd r14, r15, 9
|
|
|
- shrd r15, rdi, 9
|
|
|
- shrd rdi, rsi, 9
|
|
|
- shrd rsi, rbx, 9
|
|
|
- shrd rbx, rdx, 9
|
|
|
- shrd rdx, rax, 9
|
|
|
- shr rax, 9
|
|
|
- add r10, QWORD PTR [rsp]
|
|
|
- adc r11, QWORD PTR [rsp+8]
|
|
|
- adc r14, QWORD PTR [rsp+16]
|
|
|
- adc r15, QWORD PTR [rsp+24]
|
|
|
- adc rdi, QWORD PTR [rsp+32]
|
|
|
- adc rsi, QWORD PTR [rsp+40]
|
|
|
- adc rbx, QWORD PTR [rsp+48]
|
|
|
- adc rdx, QWORD PTR [rsp+56]
|
|
|
- adc rcx, rax
|
|
|
- mov rax, rcx
|
|
|
- shr rcx, 9
|
|
|
- and rax, 511
|
|
|
- add r10, rcx
|
|
|
- adc r11, 0
|
|
|
- adc r14, 0
|
|
|
- adc r15, 0
|
|
|
- adc rdi, 0
|
|
|
- adc rsi, 0
|
|
|
- adc rbx, 0
|
|
|
- adc rdx, 0
|
|
|
- adc rax, 0
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov QWORD PTR [r8+16], r14
|
|
|
- mov QWORD PTR [r8+24], r15
|
|
|
- mov QWORD PTR [r8+32], rdi
|
|
|
- mov QWORD PTR [r8+40], rsi
|
|
|
- mov QWORD PTR [r8+48], rbx
|
|
|
- mov QWORD PTR [r8+56], rdx
|
|
|
- mov QWORD PTR [r8+64], rax
|
|
|
- add rsp, 144
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- ret
|
|
|
-sp_521_mont_sqr_avx2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_cond_sub_avx2_9 PROC
|
|
|
- push r12
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- pext r12, r12, r9
|
|
|
- sub r10, r12
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- sbb r12, r10
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- sbb rax, rax
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_cond_sub_avx2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Reduce the number back to 521 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_reduce_order_avx2_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- mov r9, rcx
|
|
|
- mov r10, rdx
|
|
|
- xor rbp, rbp
|
|
|
- ; i = 9
|
|
|
- mov r11, 8
|
|
|
- mov r14, QWORD PTR [r9]
|
|
|
- mov r15, QWORD PTR [r9+8]
|
|
|
- mov rdi, QWORD PTR [r9+16]
|
|
|
- mov rsi, QWORD PTR [r9+24]
|
|
|
- add r9, 32
|
|
|
- xor rbp, rbp
|
|
|
-L_521_mont_reduce_order_avx2_9_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r12, r14
|
|
|
- imul rdx, r8
|
|
|
- xor rbx, rbx
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10]
|
|
|
- mov r14, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [r9+-32], r12
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+8]
|
|
|
- mov r15, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+16]
|
|
|
- mov rdi, rsi
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+24]
|
|
|
- mov rsi, QWORD PTR [r9]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+32]
|
|
|
- mov r13, QWORD PTR [r9+8]
|
|
|
- adcx rsi, rax
|
|
|
- adox r13, rcx
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+40]
|
|
|
- mov r12, QWORD PTR [r9+16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+8], r13
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+48]
|
|
|
- mov r13, QWORD PTR [r9+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+16], r12
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+56]
|
|
|
- mov r12, QWORD PTR [r9+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+24], r13
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+64]
|
|
|
- mov r13, QWORD PTR [r9+40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+32], r12
|
|
|
- adcx r13, rbp
|
|
|
- mov rbp, rbx
|
|
|
- mov QWORD PTR [r9+40], r13
|
|
|
- adox rbp, rbx
|
|
|
- adcx rbp, rbx
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r13, r14
|
|
|
- imul rdx, r8
|
|
|
- xor rbx, rbx
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10]
|
|
|
- mov r14, r15
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [r9+-24], r13
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+8]
|
|
|
- mov r15, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+16]
|
|
|
- mov rdi, rsi
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+24]
|
|
|
- mov rsi, QWORD PTR [r9+8]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+32]
|
|
|
- mov r12, QWORD PTR [r9+16]
|
|
|
- adcx rsi, rax
|
|
|
- adox r12, rcx
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+40]
|
|
|
- mov r13, QWORD PTR [r9+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+16], r12
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+48]
|
|
|
- mov r12, QWORD PTR [r9+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+24], r13
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+56]
|
|
|
- mov r13, QWORD PTR [r9+40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+32], r12
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+64]
|
|
|
- mov r12, QWORD PTR [r9+48]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+40], r13
|
|
|
- adcx r12, rbp
|
|
|
- mov rbp, rbx
|
|
|
- mov QWORD PTR [r9+48], r12
|
|
|
- adox rbp, rbx
|
|
|
- adcx rbp, rbx
|
|
|
- ; a += 2
|
|
|
- add r9, 16
|
|
|
- ; i -= 2
|
|
|
- sub r11, 2
|
|
|
- jnz L_521_mont_reduce_order_avx2_9_loop
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r12, r14
|
|
|
- imul rdx, r8
|
|
|
- and rdx, 511
|
|
|
- xor rbx, rbx
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10]
|
|
|
- mov r14, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [r9+-32], r12
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+8]
|
|
|
- mov r15, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+16]
|
|
|
- mov rdi, rsi
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+24]
|
|
|
- mov rsi, QWORD PTR [r9]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+32]
|
|
|
- mov r13, QWORD PTR [r9+8]
|
|
|
- adcx rsi, rax
|
|
|
- adox r13, rcx
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+40]
|
|
|
- mov r12, QWORD PTR [r9+16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+8], r13
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+48]
|
|
|
- mov r13, QWORD PTR [r9+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+16], r12
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+56]
|
|
|
- mov r12, QWORD PTR [r9+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+24], r13
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+64]
|
|
|
- mov r13, QWORD PTR [r9+40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+32], r12
|
|
|
- adcx r13, rbp
|
|
|
- mov rbp, rbx
|
|
|
- mov QWORD PTR [r9+40], r13
|
|
|
- adox rbp, rbx
|
|
|
- ; a += 1
|
|
|
- add r9, 8
|
|
|
- mov QWORD PTR [r9+-32], r14
|
|
|
- mov QWORD PTR [r9+-24], r15
|
|
|
- mov QWORD PTR [r9+-16], rdi
|
|
|
- mov QWORD PTR [r9+-8], rsi
|
|
|
- sub r9, 32
|
|
|
- lea r8, QWORD PTR [r9+-8]
|
|
|
- sub r9, 72
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r14, QWORD PTR [r8+8]
|
|
|
- mov r15, QWORD PTR [r8+16]
|
|
|
- mov rdi, QWORD PTR [r8+24]
|
|
|
- mov r13, QWORD PTR [r8+32]
|
|
|
- shrd r12, r14, 9
|
|
|
- shrd r14, r15, 9
|
|
|
- shrd r15, rdi, 9
|
|
|
- shrd rdi, r13, 9
|
|
|
- mov QWORD PTR [r9], r12
|
|
|
- mov QWORD PTR [r9+8], r14
|
|
|
- mov QWORD PTR [r9+16], r15
|
|
|
- mov QWORD PTR [r9+24], rdi
|
|
|
- mov r14, QWORD PTR [r8+40]
|
|
|
- mov r15, QWORD PTR [r8+48]
|
|
|
- mov rdi, QWORD PTR [r8+56]
|
|
|
- mov r12, QWORD PTR [r8+64]
|
|
|
- shrd r13, r14, 9
|
|
|
- shrd r14, r15, 9
|
|
|
- shrd r15, rdi, 9
|
|
|
- shrd rdi, r12, 9
|
|
|
- mov QWORD PTR [r9+32], r13
|
|
|
- mov QWORD PTR [r9+40], r14
|
|
|
- mov QWORD PTR [r9+48], r15
|
|
|
- mov QWORD PTR [r9+56], rdi
|
|
|
- mov r14, QWORD PTR [r8+72]
|
|
|
- shrd r12, r14, 9
|
|
|
- shr r14, 9
|
|
|
- mov QWORD PTR [r9+64], r12
|
|
|
- mov QWORD PTR [r9+72], r14
|
|
|
- mov rbp, QWORD PTR [r9+64]
|
|
|
- shr rbp, 9
|
|
|
- neg rbp
|
|
|
- mov rcx, QWORD PTR [r10]
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- sub rdx, rcx
|
|
|
- mov rcx, QWORD PTR [r10+8]
|
|
|
- mov rax, QWORD PTR [r9+8]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+16]
|
|
|
- mov rcx, QWORD PTR [r9+16]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+8], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+16], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [r9+32]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+24], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+40]
|
|
|
- mov rcx, QWORD PTR [r9+40]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+32], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+40], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+56]
|
|
|
- mov rax, QWORD PTR [r9+56]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+48], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+64]
|
|
|
- mov rcx, QWORD PTR [r9+64]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+56], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov QWORD PTR [r9+64], rcx
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mont_reduce_order_avx2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
|
|
|
-; *
|
|
|
-; * r Result of division by 2.
|
|
|
-; * a Number to divide.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mont_div2_avx2_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- mov r13, QWORD PTR [rdx+48]
|
|
|
- mov r14, QWORD PTR [rdx+56]
|
|
|
- mov r15, QWORD PTR [rdx+64]
|
|
|
- mov rdi, rax
|
|
|
- and rdi, 1
|
|
|
- sub rax, rdi
|
|
|
- sbb r8, 0
|
|
|
- sbb r9, 0
|
|
|
- sbb r10, 0
|
|
|
- sbb r11, 0
|
|
|
- sbb r12, 0
|
|
|
- sbb r13, 0
|
|
|
- sbb r14, 0
|
|
|
- sbb r15, 0
|
|
|
- shl rdi, 9
|
|
|
- add r15, rdi
|
|
|
- shrd rax, r8, 1
|
|
|
- shrd r8, r9, 1
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r11, 1
|
|
|
- shrd r11, r12, 1
|
|
|
- shrd r12, r13, 1
|
|
|
- shrd r13, r14, 1
|
|
|
- shrd r14, r15, 1
|
|
|
- shr r15, 1
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- mov QWORD PTR [rcx+48], r13
|
|
|
- mov QWORD PTR [rcx+56], r14
|
|
|
- mov QWORD PTR [rcx+64], r15
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mont_div2_avx2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_get_entry_64_9 PROC
|
|
|
- push r12
|
|
|
- sub rsp, 160
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- vmovdqu OWORD PTR [rsp+128], xmm14
|
|
|
- vmovdqu OWORD PTR [rsp+144], xmm15
|
|
|
- ; From entry 1
|
|
|
- mov r12, 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- add rdx, 144
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 63
|
|
|
- pshufd xmm15, xmm15, 0
|
|
|
- pshufd xmm13, xmm13, 0
|
|
|
- pxor xmm14, xmm14
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- xor r11, r11
|
|
|
- movdqa xmm14, xmm15
|
|
|
-L_521_get_entry_64_9_start_0:
|
|
|
- movdqa xmm12, xmm14
|
|
|
- paddd xmm14, xmm15
|
|
|
- pcmpeqd xmm12, xmm13
|
|
|
- xor r9, r9
|
|
|
- cmp r8, r12
|
|
|
- sete r9b
|
|
|
- neg r9
|
|
|
- inc r12
|
|
|
- movdqu xmm4, [rdx]
|
|
|
- movdqu xmm5, [rdx+16]
|
|
|
- movdqu xmm6, [rdx+32]
|
|
|
- movdqu xmm7, [rdx+48]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- add rdx, 144
|
|
|
- pand xmm4, xmm12
|
|
|
- pand xmm5, xmm12
|
|
|
- pand xmm6, xmm12
|
|
|
- pand xmm7, xmm12
|
|
|
- and r10, r9
|
|
|
- por xmm0, xmm4
|
|
|
- por xmm1, xmm5
|
|
|
- por xmm2, xmm6
|
|
|
- por xmm3, xmm7
|
|
|
- or r11, r10
|
|
|
- dec rax
|
|
|
- jnz L_521_get_entry_64_9_start_0
|
|
|
- movdqu [rcx], xmm0
|
|
|
- movdqu [rcx+16], xmm1
|
|
|
- movdqu [rcx+32], xmm2
|
|
|
- movdqu [rcx+48], xmm3
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- ; From entry 1
|
|
|
- mov r12, 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- sub rdx, 9000
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 63
|
|
|
- pshufd xmm15, xmm15, 0
|
|
|
- pshufd xmm13, xmm13, 0
|
|
|
- pxor xmm14, xmm14
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- xor r11, r11
|
|
|
- movdqa xmm14, xmm15
|
|
|
-L_521_get_entry_64_9_start_1:
|
|
|
- movdqa xmm12, xmm14
|
|
|
- paddd xmm14, xmm15
|
|
|
- pcmpeqd xmm12, xmm13
|
|
|
- xor r9, r9
|
|
|
- cmp r8, r12
|
|
|
- sete r9b
|
|
|
- neg r9
|
|
|
- inc r12
|
|
|
- movdqu xmm4, [rdx]
|
|
|
- movdqu xmm5, [rdx+16]
|
|
|
- movdqu xmm6, [rdx+32]
|
|
|
- movdqu xmm7, [rdx+48]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- add rdx, 144
|
|
|
- pand xmm4, xmm12
|
|
|
- pand xmm5, xmm12
|
|
|
- pand xmm6, xmm12
|
|
|
- pand xmm7, xmm12
|
|
|
- and r10, r9
|
|
|
- por xmm0, xmm4
|
|
|
- por xmm1, xmm5
|
|
|
- por xmm2, xmm6
|
|
|
- por xmm3, xmm7
|
|
|
- or r11, r10
|
|
|
- dec rax
|
|
|
- jnz L_521_get_entry_64_9_start_1
|
|
|
- movdqu [rcx+144], xmm0
|
|
|
- movdqu [rcx+160], xmm1
|
|
|
- movdqu [rcx+176], xmm2
|
|
|
- movdqu [rcx+192], xmm3
|
|
|
- mov QWORD PTR [rcx+208], r11
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- vmovdqu xmm14, OWORD PTR [rsp+128]
|
|
|
- vmovdqu xmm15, OWORD PTR [rsp+144]
|
|
|
- add rsp, 160
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_get_entry_64_9 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_get_entry_64_avx2_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- sub rsp, 96
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- mov r14, 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm9, r8d
|
|
|
- add rdx, 144
|
|
|
- movd xmm11, eax
|
|
|
- mov rax, 64
|
|
|
- vpxor ymm10, ymm10, ymm10
|
|
|
- vpermd ymm9, ymm10, ymm9
|
|
|
- vpermd ymm11, ymm10, ymm11
|
|
|
- vpxor ymm0, ymm0, ymm0
|
|
|
- vpxor ymm1, ymm1, ymm1
|
|
|
- vpxor ymm2, ymm2, ymm2
|
|
|
- vpxor ymm3, ymm3, ymm3
|
|
|
- xor r10, r10
|
|
|
- xor r11, r11
|
|
|
- vmovdqa ymm10, ymm11
|
|
|
-L_521_get_entry_64_avx2_9_start:
|
|
|
- vpcmpeqd ymm8, ymm10, ymm9
|
|
|
- vpaddd ymm10, ymm10, ymm11
|
|
|
- xor r9, r9
|
|
|
- cmp r8, r14
|
|
|
- sete r9b
|
|
|
- neg r9
|
|
|
- inc r14
|
|
|
- vmovupd ymm4, YMMWORD PTR [rdx]
|
|
|
- vmovupd ymm5, YMMWORD PTR [rdx+32]
|
|
|
- vmovupd ymm6, YMMWORD PTR [rdx+72]
|
|
|
- vmovupd ymm7, YMMWORD PTR [rdx+104]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- mov r13, QWORD PTR [rdx+136]
|
|
|
- add rdx, 144
|
|
|
- vpand ymm4, ymm4, ymm8
|
|
|
- vpand ymm5, ymm5, ymm8
|
|
|
- vpand ymm6, ymm6, ymm8
|
|
|
- vpand ymm7, ymm7, ymm8
|
|
|
- and r12, r9
|
|
|
- and r13, r9
|
|
|
- vpor ymm0, ymm0, ymm4
|
|
|
- vpor ymm1, ymm1, ymm5
|
|
|
- vpor ymm2, ymm2, ymm6
|
|
|
- vpor ymm3, ymm3, ymm7
|
|
|
- or r10, r12
|
|
|
- or r11, r13
|
|
|
- dec rax
|
|
|
- jnz L_521_get_entry_64_avx2_9_start
|
|
|
- vmovupd YMMWORD PTR [rcx], ymm0
|
|
|
- vmovupd YMMWORD PTR [rcx+32], ymm1
|
|
|
- vmovupd YMMWORD PTR [rcx+144], ymm2
|
|
|
- vmovupd YMMWORD PTR [rcx+176], ymm3
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov QWORD PTR [rcx+208], r11
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- add rsp, 96
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_get_entry_64_avx2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-IFNDEF WC_NO_CACHE_RESISTANT
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_get_entry_65_9 PROC
|
|
|
- push r12
|
|
|
- sub rsp, 160
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- vmovdqu OWORD PTR [rsp+96], xmm12
|
|
|
- vmovdqu OWORD PTR [rsp+112], xmm13
|
|
|
- vmovdqu OWORD PTR [rsp+128], xmm14
|
|
|
- vmovdqu OWORD PTR [rsp+144], xmm15
|
|
|
- ; From entry 1
|
|
|
- mov r12, 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- add rdx, 144
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 64
|
|
|
- pshufd xmm15, xmm15, 0
|
|
|
- pshufd xmm13, xmm13, 0
|
|
|
- pxor xmm14, xmm14
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- xor r11, r11
|
|
|
- movdqa xmm14, xmm15
|
|
|
-L_521_get_entry_65_9_start_0:
|
|
|
- movdqa xmm12, xmm14
|
|
|
- paddd xmm14, xmm15
|
|
|
- pcmpeqd xmm12, xmm13
|
|
|
- xor r9, r9
|
|
|
- cmp r8, r12
|
|
|
- sete r9b
|
|
|
- neg r9
|
|
|
- inc r12
|
|
|
- movdqu xmm4, [rdx]
|
|
|
- movdqu xmm5, [rdx+16]
|
|
|
- movdqu xmm6, [rdx+32]
|
|
|
- movdqu xmm7, [rdx+48]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- add rdx, 144
|
|
|
- pand xmm4, xmm12
|
|
|
- pand xmm5, xmm12
|
|
|
- pand xmm6, xmm12
|
|
|
- pand xmm7, xmm12
|
|
|
- and r10, r9
|
|
|
- por xmm0, xmm4
|
|
|
- por xmm1, xmm5
|
|
|
- por xmm2, xmm6
|
|
|
- por xmm3, xmm7
|
|
|
- or r11, r10
|
|
|
- dec rax
|
|
|
- jnz L_521_get_entry_65_9_start_0
|
|
|
- movdqu [rcx], xmm0
|
|
|
- movdqu [rcx+16], xmm1
|
|
|
- movdqu [rcx+32], xmm2
|
|
|
- movdqu [rcx+48], xmm3
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- ; From entry 1
|
|
|
- mov r12, 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm13, r8d
|
|
|
- sub rdx, 9144
|
|
|
- movd xmm15, eax
|
|
|
- mov rax, 64
|
|
|
- pshufd xmm15, xmm15, 0
|
|
|
- pshufd xmm13, xmm13, 0
|
|
|
- pxor xmm14, xmm14
|
|
|
- pxor xmm0, xmm0
|
|
|
- pxor xmm1, xmm1
|
|
|
- pxor xmm2, xmm2
|
|
|
- pxor xmm3, xmm3
|
|
|
- xor r11, r11
|
|
|
- movdqa xmm14, xmm15
|
|
|
-L_521_get_entry_65_9_start_1:
|
|
|
- movdqa xmm12, xmm14
|
|
|
- paddd xmm14, xmm15
|
|
|
- pcmpeqd xmm12, xmm13
|
|
|
- xor r9, r9
|
|
|
- cmp r8, r12
|
|
|
- sete r9b
|
|
|
- neg r9
|
|
|
- inc r12
|
|
|
- movdqu xmm4, [rdx]
|
|
|
- movdqu xmm5, [rdx+16]
|
|
|
- movdqu xmm6, [rdx+32]
|
|
|
- movdqu xmm7, [rdx+48]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- add rdx, 144
|
|
|
- pand xmm4, xmm12
|
|
|
- pand xmm5, xmm12
|
|
|
- pand xmm6, xmm12
|
|
|
- pand xmm7, xmm12
|
|
|
- and r10, r9
|
|
|
- por xmm0, xmm4
|
|
|
- por xmm1, xmm5
|
|
|
- por xmm2, xmm6
|
|
|
- por xmm3, xmm7
|
|
|
- or r11, r10
|
|
|
- dec rax
|
|
|
- jnz L_521_get_entry_65_9_start_1
|
|
|
- movdqu [rcx+144], xmm0
|
|
|
- movdqu [rcx+160], xmm1
|
|
|
- movdqu [rcx+176], xmm2
|
|
|
- movdqu [rcx+192], xmm3
|
|
|
- mov QWORD PTR [rcx+208], r11
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- vmovdqu xmm12, OWORD PTR [rsp+96]
|
|
|
- vmovdqu xmm13, OWORD PTR [rsp+112]
|
|
|
- vmovdqu xmm14, OWORD PTR [rsp+128]
|
|
|
- vmovdqu xmm15, OWORD PTR [rsp+144]
|
|
|
- add rsp, 160
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_get_entry_65_9 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Touch each possible entry that could be being copied.
|
|
|
-; *
|
|
|
-; * r Point to copy into.
|
|
|
-; * table Table - start of the entries to access
|
|
|
-; * idx Index of entry to retrieve.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_get_entry_65_avx2_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- sub rsp, 96
|
|
|
- vmovdqu OWORD PTR [rsp], xmm6
|
|
|
- vmovdqu OWORD PTR [rsp+16], xmm7
|
|
|
- vmovdqu OWORD PTR [rsp+32], xmm8
|
|
|
- vmovdqu OWORD PTR [rsp+48], xmm9
|
|
|
- vmovdqu OWORD PTR [rsp+64], xmm10
|
|
|
- vmovdqu OWORD PTR [rsp+80], xmm11
|
|
|
- mov r14, 1
|
|
|
- mov rax, 1
|
|
|
- movd xmm9, r8d
|
|
|
- add rdx, 144
|
|
|
- movd xmm11, eax
|
|
|
- mov rax, 65
|
|
|
- vpxor ymm10, ymm10, ymm10
|
|
|
- vpermd ymm9, ymm10, ymm9
|
|
|
- vpermd ymm11, ymm10, ymm11
|
|
|
- vpxor ymm0, ymm0, ymm0
|
|
|
- vpxor ymm1, ymm1, ymm1
|
|
|
- vpxor ymm2, ymm2, ymm2
|
|
|
- vpxor ymm3, ymm3, ymm3
|
|
|
- xor r10, r10
|
|
|
- xor r11, r11
|
|
|
- vmovdqa ymm10, ymm11
|
|
|
-L_521_get_entry_65_avx2_9_start:
|
|
|
- vpcmpeqd ymm8, ymm10, ymm9
|
|
|
- vpaddd ymm10, ymm10, ymm11
|
|
|
- xor r9, r9
|
|
|
- cmp r8, r14
|
|
|
- sete r9b
|
|
|
- neg r9
|
|
|
- inc r14
|
|
|
- vmovupd ymm4, YMMWORD PTR [rdx]
|
|
|
- vmovupd ymm5, YMMWORD PTR [rdx+32]
|
|
|
- vmovupd ymm6, YMMWORD PTR [rdx+72]
|
|
|
- vmovupd ymm7, YMMWORD PTR [rdx+104]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- mov r13, QWORD PTR [rdx+136]
|
|
|
- add rdx, 144
|
|
|
- vpand ymm4, ymm4, ymm8
|
|
|
- vpand ymm5, ymm5, ymm8
|
|
|
- vpand ymm6, ymm6, ymm8
|
|
|
- vpand ymm7, ymm7, ymm8
|
|
|
- and r12, r9
|
|
|
- and r13, r9
|
|
|
- vpor ymm0, ymm0, ymm4
|
|
|
- vpor ymm1, ymm1, ymm5
|
|
|
- vpor ymm2, ymm2, ymm6
|
|
|
- vpor ymm3, ymm3, ymm7
|
|
|
- or r10, r12
|
|
|
- or r11, r13
|
|
|
- dec rax
|
|
|
- jnz L_521_get_entry_65_avx2_9_start
|
|
|
- vmovupd YMMWORD PTR [rcx], ymm0
|
|
|
- vmovupd YMMWORD PTR [rcx+32], ymm1
|
|
|
- vmovupd YMMWORD PTR [rcx+144], ymm2
|
|
|
- vmovupd YMMWORD PTR [rcx+176], ymm3
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov QWORD PTR [rcx+208], r11
|
|
|
- vmovdqu xmm6, OWORD PTR [rsp]
|
|
|
- vmovdqu xmm7, OWORD PTR [rsp+16]
|
|
|
- vmovdqu xmm8, OWORD PTR [rsp+32]
|
|
|
- vmovdqu xmm9, OWORD PTR [rsp+48]
|
|
|
- vmovdqu xmm10, OWORD PTR [rsp+64]
|
|
|
- vmovdqu xmm11, OWORD PTR [rsp+80]
|
|
|
- add rsp, 96
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_get_entry_65_avx2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-; /* Add 1 to a. (a = a + 1)
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_add_one_9 PROC
|
|
|
- add QWORD PTR [rcx], 1
|
|
|
- adc QWORD PTR [rcx+8], 0
|
|
|
- adc QWORD PTR [rcx+16], 0
|
|
|
- adc QWORD PTR [rcx+24], 0
|
|
|
- adc QWORD PTR [rcx+32], 0
|
|
|
- adc QWORD PTR [rcx+40], 0
|
|
|
- adc QWORD PTR [rcx+48], 0
|
|
|
- adc QWORD PTR [rcx+56], 0
|
|
|
- adc QWORD PTR [rcx+64], 0
|
|
|
- ret
|
|
|
-sp_521_add_one_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_from_bin_bswap PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 65
|
|
|
- xor r13, r13
|
|
|
- jmp L_521_from_bin_bswap_64_end
|
|
|
-L_521_from_bin_bswap_64_start:
|
|
|
- sub r11, 64
|
|
|
- mov rax, QWORD PTR [r11+56]
|
|
|
- mov r10, QWORD PTR [r11+48]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov rax, QWORD PTR [r11+40]
|
|
|
- mov r10, QWORD PTR [r11+32]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov rax, QWORD PTR [r11+24]
|
|
|
- mov r10, QWORD PTR [r11+16]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov rax, QWORD PTR [r11+8]
|
|
|
- mov r10, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_521_from_bin_bswap_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_521_from_bin_bswap_64_start
|
|
|
- jmp L_521_from_bin_bswap_8_end
|
|
|
-L_521_from_bin_bswap_8_start:
|
|
|
- sub r11, 8
|
|
|
- mov rax, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_521_from_bin_bswap_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_521_from_bin_bswap_8_start
|
|
|
- cmp r9, r13
|
|
|
- je L_521_from_bin_bswap_hi_end
|
|
|
- mov r10, r13
|
|
|
- mov rax, r13
|
|
|
-L_521_from_bin_bswap_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_521_from_bin_bswap_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_521_from_bin_bswap_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_521_from_bin_bswap_zero_end
|
|
|
-L_521_from_bin_bswap_zero_start:
|
|
|
- mov QWORD PTR [rcx], r13
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_521_from_bin_bswap_zero_start
|
|
|
-L_521_from_bin_bswap_zero_end:
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_from_bin_bswap ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the movbe instruction which is an optional instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_from_bin_movbe PROC
|
|
|
- push r12
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 66
|
|
|
- jmp L_521_from_bin_movbe_64_end
|
|
|
-L_521_from_bin_movbe_64_start:
|
|
|
- sub r11, 64
|
|
|
- movbe rax, QWORD PTR [r11+56]
|
|
|
- movbe r10, QWORD PTR [r11+48]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- movbe rax, QWORD PTR [r11+40]
|
|
|
- movbe r10, QWORD PTR [r11+32]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- movbe rax, QWORD PTR [r11+24]
|
|
|
- movbe r10, QWORD PTR [r11+16]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- movbe rax, QWORD PTR [r11+8]
|
|
|
- movbe r10, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_521_from_bin_movbe_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_521_from_bin_movbe_64_start
|
|
|
- jmp L_521_from_bin_movbe_8_end
|
|
|
-L_521_from_bin_movbe_8_start:
|
|
|
- sub r11, 8
|
|
|
- movbe rax, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_521_from_bin_movbe_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_521_from_bin_movbe_8_start
|
|
|
- cmp r9, 0
|
|
|
- je L_521_from_bin_movbe_hi_end
|
|
|
- mov r10, 0
|
|
|
- mov rax, 0
|
|
|
-L_521_from_bin_movbe_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_521_from_bin_movbe_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_521_from_bin_movbe_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_521_from_bin_movbe_zero_end
|
|
|
-L_521_from_bin_movbe_zero_start:
|
|
|
- mov QWORD PTR [rcx], 0
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_521_from_bin_movbe_zero_start
|
|
|
-L_521_from_bin_movbe_zero_end:
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_from_bin_movbe ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 65
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_to_bin_bswap_9 PROC
|
|
|
- mov r8b, BYTE PTR [rcx+64]
|
|
|
- mov al, BYTE PTR [rcx+65]
|
|
|
- mov BYTE PTR [rdx], al
|
|
|
- mov BYTE PTR [rdx+1], r8b
|
|
|
- mov rax, QWORD PTR [rcx+56]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+2], rax
|
|
|
- mov QWORD PTR [rdx+10], r8
|
|
|
- mov rax, QWORD PTR [rcx+40]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+18], rax
|
|
|
- mov QWORD PTR [rdx+26], r8
|
|
|
- mov rax, QWORD PTR [rcx+24]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+34], rax
|
|
|
- mov QWORD PTR [rdx+42], r8
|
|
|
- mov rax, QWORD PTR [rcx+8]
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- bswap rax
|
|
|
- bswap r8
|
|
|
- mov QWORD PTR [rdx+50], rax
|
|
|
- mov QWORD PTR [rdx+58], r8
|
|
|
- ret
|
|
|
-sp_521_to_bin_bswap_9 ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Write r as big endian to byte array.
|
|
|
-; * Fixed length number of bytes written: 65
|
|
|
-; * Uses the movbe instruction which is optional.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a Byte array.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_to_bin_movbe_9 PROC
|
|
|
- mov r8b, BYTE PTR [rcx+64]
|
|
|
- mov al, BYTE PTR [rcx+65]
|
|
|
- mov BYTE PTR [rdx], al
|
|
|
- mov BYTE PTR [rdx+1], r8b
|
|
|
- movbe rax, QWORD PTR [rcx+56]
|
|
|
- movbe r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rdx+2], rax
|
|
|
- mov QWORD PTR [rdx+10], r8
|
|
|
- movbe rax, QWORD PTR [rcx+40]
|
|
|
- movbe r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rdx+18], rax
|
|
|
- mov QWORD PTR [rdx+26], r8
|
|
|
- movbe rax, QWORD PTR [rcx+24]
|
|
|
- movbe r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rdx+34], rax
|
|
|
- mov QWORD PTR [rdx+42], r8
|
|
|
- movbe rax, QWORD PTR [rcx+8]
|
|
|
- movbe r8, QWORD PTR [rcx]
|
|
|
- mov QWORD PTR [rdx+50], rax
|
|
|
- mov QWORD PTR [rdx+58], r8
|
|
|
- ret
|
|
|
-sp_521_to_bin_movbe_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Shift number right by 1 bit. (r = a >> 1)
|
|
|
-; *
|
|
|
-; * r Result of right shift by 1.
|
|
|
-; * a Number to shift.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_rshift_9 PROC
|
|
|
- push r12
|
|
|
- mov rcx, r8
|
|
|
- mov rax, rcx
|
|
|
- mov r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- shrd r8, r9, cl
|
|
|
- shrd r9, r10, cl
|
|
|
- shrd r10, r11, cl
|
|
|
- shrd r11, r12, cl
|
|
|
- mov QWORD PTR [rax], r8
|
|
|
- mov QWORD PTR [rax+8], r9
|
|
|
- mov QWORD PTR [rax+16], r10
|
|
|
- mov QWORD PTR [rax+24], r11
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rdx+64]
|
|
|
- shrd r12, r9, cl
|
|
|
- shrd r9, r10, cl
|
|
|
- shrd r10, r11, cl
|
|
|
- shrd r11, r8, cl
|
|
|
- mov QWORD PTR [rax+32], r12
|
|
|
- mov QWORD PTR [rax+40], r9
|
|
|
- mov QWORD PTR [rax+48], r10
|
|
|
- mov QWORD PTR [rax+56], r11
|
|
|
- shr r8, cl
|
|
|
- mov QWORD PTR [rax+64], r8
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_rshift_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Shift number left by n bit. (r = a << n)
|
|
|
-; *
|
|
|
-; * r Result of left shift by n.
|
|
|
-; * a Number to shift.
|
|
|
-; * n Amoutnt o shift.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_lshift_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov cl, r8b
|
|
|
- mov rax, rcx
|
|
|
- mov r12, 0
|
|
|
- mov r13, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rdx+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov r11, QWORD PTR [rdx+64]
|
|
|
- shld r12, r11, cl
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+40], r8
|
|
|
- mov QWORD PTR [rax+48], r9
|
|
|
- mov QWORD PTR [rax+56], r10
|
|
|
- mov QWORD PTR [rax+64], r11
|
|
|
- mov QWORD PTR [rax+72], r12
|
|
|
- mov r11, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+8], r8
|
|
|
- mov QWORD PTR [rax+16], r9
|
|
|
- mov QWORD PTR [rax+24], r10
|
|
|
- mov QWORD PTR [rax+32], r13
|
|
|
- shl r11, cl
|
|
|
- mov QWORD PTR [rax], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_lshift_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Shift number left by n bit. (r = a << n)
|
|
|
-; *
|
|
|
-; * r Result of left shift by n.
|
|
|
-; * a Number to shift.
|
|
|
-; * n Amoutnt o shift.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_lshift_18 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov cl, r8b
|
|
|
- mov rax, rcx
|
|
|
- mov r12, 0
|
|
|
- mov r13, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rdx+112]
|
|
|
- mov r9, QWORD PTR [rdx+120]
|
|
|
- mov r10, QWORD PTR [rdx+128]
|
|
|
- mov r11, QWORD PTR [rdx+136]
|
|
|
- shld r12, r11, cl
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+112], r8
|
|
|
- mov QWORD PTR [rax+120], r9
|
|
|
- mov QWORD PTR [rax+128], r10
|
|
|
- mov QWORD PTR [rax+136], r11
|
|
|
- mov QWORD PTR [rax+144], r12
|
|
|
- mov r11, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rdx+80]
|
|
|
- mov r9, QWORD PTR [rdx+88]
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+80], r8
|
|
|
- mov QWORD PTR [rax+88], r9
|
|
|
- mov QWORD PTR [rax+96], r10
|
|
|
- mov QWORD PTR [rax+104], r13
|
|
|
- mov r13, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rdx+48]
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- shld r11, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r13, cl
|
|
|
- mov QWORD PTR [rax+48], r8
|
|
|
- mov QWORD PTR [rax+56], r9
|
|
|
- mov QWORD PTR [rax+64], r10
|
|
|
- mov QWORD PTR [rax+72], r11
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rdx+16]
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- shld r13, r10, cl
|
|
|
- shld r10, r9, cl
|
|
|
- shld r9, r8, cl
|
|
|
- shld r8, r11, cl
|
|
|
- mov QWORD PTR [rax+16], r8
|
|
|
- mov QWORD PTR [rax+24], r9
|
|
|
- mov QWORD PTR [rax+32], r10
|
|
|
- mov QWORD PTR [rax+40], r13
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- shld r11, r10, cl
|
|
|
- shl r10, cl
|
|
|
- mov QWORD PTR [rax], r10
|
|
|
- mov QWORD PTR [rax+8], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_lshift_18 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Sub b from a into a. (a -= b)
|
|
|
-; *
|
|
|
-; * a A single precision integer and result.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_sub_in_place_9 PROC
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- sub r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb r9, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb r9, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb r9, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb r9, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_521_sub_in_place_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mul_d_9 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- mov r10, rax
|
|
|
- mov r11, rdx
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- ; A[1] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B
|
|
|
- mov rax, r8
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mul_d_9 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_mul_d_avx2_9 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rdx, r8
|
|
|
- xor r13, r13
|
|
|
- mulx r12, r11, QWORD PTR [rax]
|
|
|
- mov QWORD PTR [rcx], r11
|
|
|
- ; A[1] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+8], r12
|
|
|
- ; A[2] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- ; A[3] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- ; A[4] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- ; A[5] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- ; A[6] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+48]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- ; A[7] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+56]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- ; A[8] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+64]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- adcx r12, r13
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- mov QWORD PTR [rcx+72], r12
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_mul_d_avx2_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF _WIN64
|
|
|
-; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
|
|
|
-; *
|
|
|
-; * d1 The high order half of the number to divide.
|
|
|
-; * d0 The low order half of the number to divide.
|
|
|
-; * div The dividend.
|
|
|
-; * returns the result of the division.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-div_521_word_asm_9 PROC
|
|
|
- mov r9, rdx
|
|
|
- mov rax, r9
|
|
|
- mov rdx, rcx
|
|
|
- div r8
|
|
|
- ret
|
|
|
-div_521_word_asm_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Shift number right by 1 bit. (r = a >> 1)
|
|
|
-; *
|
|
|
-; * r Result of right shift by 1.
|
|
|
-; * a Number to shift.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_rshift1_9 PROC
|
|
|
- push r12
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rdx+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- shrd rax, r8, 1
|
|
|
- shrd r8, r9, 1
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r12, 1
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r8
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov r8, QWORD PTR [rdx+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- shrd r12, r8, 1
|
|
|
- shrd r8, r9, 1
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, rax, 1
|
|
|
- mov QWORD PTR [rcx+32], r12
|
|
|
- mov QWORD PTR [rcx+40], r8
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- shr rax, 1
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_rshift1_9 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Divide the number by 2 mod the prime. (r = a / 2 % m)
|
|
|
-; *
|
|
|
-; * r Result of division by 2.
|
|
|
-; * a Number to divide.
|
|
|
-; * m Modulus
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_div2_mod_9 PROC
|
|
|
- push r12
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- and rax, 1
|
|
|
- je L_521_mod_inv_9_div2_mod_no_add
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- add rax, r10
|
|
|
- adc r9, r11
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov rax, QWORD PTR [rdx+16]
|
|
|
- mov r9, QWORD PTR [rdx+24]
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- adc rax, r10
|
|
|
- adc r9, r11
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- adc rax, r10
|
|
|
- adc r9, r11
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- mov r9, QWORD PTR [rdx+56]
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- adc rax, r10
|
|
|
- adc r9, r11
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- adc rax, r10
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
-L_521_mod_inv_9_div2_mod_no_add:
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- shrd rax, r9, 1
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r11, 1
|
|
|
- shrd r11, r12, 1
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- shrd r12, r9, 1
|
|
|
- shrd r9, r10, 1
|
|
|
- shrd r10, r11, 1
|
|
|
- shrd r11, rax, 1
|
|
|
- mov QWORD PTR [rcx+32], r12
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- shr rax, 1
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_521_div2_mod_9 ENDP
|
|
|
-_text ENDS
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_521_num_bits_9 PROC
|
|
|
- xor rax, rax
|
|
|
- mov rdx, QWORD PTR [rcx+64]
|
|
|
- cmp rdx, 0
|
|
|
- je L_521_num_bits_9_end_512
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 513
|
|
|
- jmp L_521_num_bits_9_done
|
|
|
-L_521_num_bits_9_end_512:
|
|
|
- mov rdx, QWORD PTR [rcx+56]
|
|
|
- cmp rdx, 0
|
|
|
- je L_521_num_bits_9_end_448
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 449
|
|
|
- jmp L_521_num_bits_9_done
|
|
|
-L_521_num_bits_9_end_448:
|
|
|
- mov rdx, QWORD PTR [rcx+48]
|
|
|
- cmp rdx, 0
|
|
|
- je L_521_num_bits_9_end_384
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 385
|
|
|
- jmp L_521_num_bits_9_done
|
|
|
-L_521_num_bits_9_end_384:
|
|
|
- mov rdx, QWORD PTR [rcx+40]
|
|
|
- cmp rdx, 0
|
|
|
- je L_521_num_bits_9_end_320
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 321
|
|
|
- jmp L_521_num_bits_9_done
|
|
|
-L_521_num_bits_9_end_320:
|
|
|
- mov rdx, QWORD PTR [rcx+32]
|
|
|
- cmp rdx, 0
|
|
|
- je L_521_num_bits_9_end_256
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 257
|
|
|
- jmp L_521_num_bits_9_done
|
|
|
-L_521_num_bits_9_end_256:
|
|
|
- mov rdx, QWORD PTR [rcx+24]
|
|
|
- cmp rdx, 0
|
|
|
- je L_521_num_bits_9_end_192
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 193
|
|
|
- jmp L_521_num_bits_9_done
|
|
|
-L_521_num_bits_9_end_192:
|
|
|
- mov rdx, QWORD PTR [rcx+16]
|
|
|
- cmp rdx, 0
|
|
|
- je L_521_num_bits_9_end_128
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 129
|
|
|
- jmp L_521_num_bits_9_done
|
|
|
-L_521_num_bits_9_end_128:
|
|
|
- mov rdx, QWORD PTR [rcx+8]
|
|
|
- cmp rdx, 0
|
|
|
- je L_521_num_bits_9_end_64
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 65
|
|
|
- jmp L_521_num_bits_9_done
|
|
|
-L_521_num_bits_9_end_64:
|
|
|
- mov rdx, QWORD PTR [rcx]
|
|
|
- cmp rdx, 0
|
|
|
- je L_521_num_bits_9_end_0
|
|
|
- mov rax, -1
|
|
|
- bsr rax, rdx
|
|
|
- add rax, 1
|
|
|
- jmp L_521_num_bits_9_done
|
|
|
-L_521_num_bits_9_end_0:
|
|
|
-L_521_num_bits_9_done:
|
|
|
- ret
|
|
|
-sp_521_num_bits_9 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF WOLFSSL_SP_1024
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mul_16 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 128
|
|
|
- ; A[0] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r11, rdx
|
|
|
- ; A[0] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- ; A[0] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+16], r12
|
|
|
- ; A[0] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- ; A[0] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- ; A[0] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- ; A[0] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- ; A[0] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- ; A[0] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+64], r12
|
|
|
- ; A[0] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+72], r10
|
|
|
- ; A[0] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+80], r11
|
|
|
- ; A[0] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+88], r12
|
|
|
- ; A[0] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[12] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- ; A[0] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- ; A[0] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[2] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[14] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+112], r12
|
|
|
- ; A[0] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[1] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[2] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[3] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[12] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[14] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[15] * B[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rsp+120], r10
|
|
|
- ; A[1] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[3] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[4] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[15] * B[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- ; A[2] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[4] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[5] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[14] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+136], r12
|
|
|
- ; A[3] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[5] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[6] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[12] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[14] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[15] * B[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+144], r10
|
|
|
- ; A[4] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[6] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[7] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[15] * B[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+152], r11
|
|
|
- ; A[5] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[7] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[8] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[14] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+160], r12
|
|
|
- ; A[6] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[8] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[9] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[12] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[14] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[15] * B[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+168], r10
|
|
|
- ; A[7] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[9] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[10] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[15] * B[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+176], r11
|
|
|
- ; A[8] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[10] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[11] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[14] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+184], r12
|
|
|
- ; A[9] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[11] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[12] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[14] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[15] * B[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+192], r10
|
|
|
- ; A[10] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[15] * B[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+200], r11
|
|
|
- ; A[11] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[14] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+208], r12
|
|
|
- ; A[12] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- xor r12, r12
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[14] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[15] * B[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- mov QWORD PTR [rcx+216], r10
|
|
|
- ; A[13] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[15] * B[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r11, rax
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+224], r11
|
|
|
- ; A[14] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- xor r11, r11
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r12, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+232], r12
|
|
|
- ; A[15] * B[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- mov QWORD PTR [rcx+240], r10
|
|
|
- mov QWORD PTR [rcx+248], r11
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r10, QWORD PTR [rsp+16]
|
|
|
- mov r11, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- mov rdx, QWORD PTR [rsp+40]
|
|
|
- mov r10, QWORD PTR [rsp+48]
|
|
|
- mov r11, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], rdx
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rsp+64]
|
|
|
- mov rdx, QWORD PTR [rsp+72]
|
|
|
- mov r10, QWORD PTR [rsp+80]
|
|
|
- mov r11, QWORD PTR [rsp+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], rdx
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov rax, QWORD PTR [rsp+96]
|
|
|
- mov rdx, QWORD PTR [rsp+104]
|
|
|
- mov r10, QWORD PTR [rsp+112]
|
|
|
- mov r11, QWORD PTR [rsp+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], rdx
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- add rsp, 128
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mul_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_sqr_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- mov r8, rdx
|
|
|
- sub rsp, 128
|
|
|
- ; A[0] * A[0]
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mul rax
|
|
|
- xor r11, r11
|
|
|
- mov QWORD PTR [rsp], rax
|
|
|
- mov r10, rdx
|
|
|
- ; A[0] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsp+8], r10
|
|
|
- ; A[0] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[1] * A[1]
|
|
|
- mov rax, QWORD PTR [r8+8]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rsp+16], r11
|
|
|
- ; A[0] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[1] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rsp+24], r9
|
|
|
- ; A[0] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[1] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[2] * A[2]
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- ; A[0] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- ; A[0] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[3]
|
|
|
- mov rax, QWORD PTR [r8+24]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rsp+48], r9
|
|
|
- ; A[0] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rsp+56], r10
|
|
|
- ; A[0] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[4]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+64], r11
|
|
|
- ; A[0] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rsp+72], r9
|
|
|
- ; A[0] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[5]
|
|
|
- mov rax, QWORD PTR [r8+40]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- ; A[0] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- ; A[0] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[6]
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rsp+96], r9
|
|
|
- ; A[0] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rsp+104], r10
|
|
|
- ; A[0] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[7]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rsp+112], r11
|
|
|
- ; A[0] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[1] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[2] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rsp+120], r9
|
|
|
- ; A[1] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+8]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[2] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[3] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[8]
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rcx+128], r10
|
|
|
- ; A[2] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+16]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[3] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[4] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rcx+136], r11
|
|
|
- ; A[3] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+24]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[4] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[5] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[9] * A[9]
|
|
|
- mov rax, QWORD PTR [r8+72]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rcx+144], r9
|
|
|
- ; A[4] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+32]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[5] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[6] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[9] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rcx+152], r10
|
|
|
- ; A[5] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+40]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[6] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[7] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[9] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[10] * A[10]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rcx+160], r11
|
|
|
- ; A[6] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+48]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[7] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[8] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[9] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[10] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul QWORD PTR [r8+80]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rcx+168], r9
|
|
|
- ; A[7] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+56]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[8] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[9] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[10] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+80]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[11] * A[11]
|
|
|
- mov rax, QWORD PTR [r8+88]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rcx+176], r10
|
|
|
- ; A[8] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+64]
|
|
|
- xor r10, r10
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[9] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[10] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+80]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[11] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul QWORD PTR [r8+88]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r11, r12
|
|
|
- adc r9, r13
|
|
|
- adc r10, r14
|
|
|
- mov QWORD PTR [rcx+184], r11
|
|
|
- ; A[9] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+72]
|
|
|
- xor r11, r11
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[10] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+80]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[11] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+88]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[12] * A[12]
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mul rax
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r9, r12
|
|
|
- adc r10, r13
|
|
|
- adc r11, r14
|
|
|
- mov QWORD PTR [rcx+192], r9
|
|
|
- ; A[10] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+80]
|
|
|
- xor r9, r9
|
|
|
- xor r14, r14
|
|
|
- mov r12, rax
|
|
|
- mov r13, rdx
|
|
|
- ; A[11] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+88]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- ; A[12] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul QWORD PTR [r8+96]
|
|
|
- add r12, rax
|
|
|
- adc r13, rdx
|
|
|
- adc r14, 0
|
|
|
- add r12, r12
|
|
|
- adc r13, r13
|
|
|
- adc r14, r14
|
|
|
- add r10, r12
|
|
|
- adc r11, r13
|
|
|
- adc r9, r14
|
|
|
- mov QWORD PTR [rcx+200], r10
|
|
|
- ; A[11] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+88]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[12] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+96]
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[13] * A[13]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- mul rax
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+208], r11
|
|
|
- ; A[12] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+96]
|
|
|
- xor r11, r11
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[13] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul QWORD PTR [r8+104]
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- mov QWORD PTR [rcx+216], r9
|
|
|
- ; A[13] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+104]
|
|
|
- xor r9, r9
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- ; A[14] * A[14]
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mul rax
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- adc r9, 0
|
|
|
- mov QWORD PTR [rcx+224], r10
|
|
|
- ; A[14] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul QWORD PTR [r8+112]
|
|
|
- xor r10, r10
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- add r11, rax
|
|
|
- adc r9, rdx
|
|
|
- adc r10, 0
|
|
|
- mov QWORD PTR [rcx+232], r11
|
|
|
- ; A[15] * A[15]
|
|
|
- mov rax, QWORD PTR [r8+120]
|
|
|
- mul rax
|
|
|
- add r9, rax
|
|
|
- adc r10, rdx
|
|
|
- mov QWORD PTR [rcx+240], r9
|
|
|
- mov QWORD PTR [rcx+248], r10
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov rdx, QWORD PTR [rsp+8]
|
|
|
- mov r12, QWORD PTR [rsp+16]
|
|
|
- mov r13, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], rdx
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- mov QWORD PTR [rcx+24], r13
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- mov rdx, QWORD PTR [rsp+40]
|
|
|
- mov r12, QWORD PTR [rsp+48]
|
|
|
- mov r13, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], rdx
|
|
|
- mov QWORD PTR [rcx+48], r12
|
|
|
- mov QWORD PTR [rcx+56], r13
|
|
|
- mov rax, QWORD PTR [rsp+64]
|
|
|
- mov rdx, QWORD PTR [rsp+72]
|
|
|
- mov r12, QWORD PTR [rsp+80]
|
|
|
- mov r13, QWORD PTR [rsp+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], rdx
|
|
|
- mov QWORD PTR [rcx+80], r12
|
|
|
- mov QWORD PTR [rcx+88], r13
|
|
|
- mov rax, QWORD PTR [rsp+96]
|
|
|
- mov rdx, QWORD PTR [rsp+104]
|
|
|
- mov r12, QWORD PTR [rsp+112]
|
|
|
- mov r13, QWORD PTR [rsp+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], rdx
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- mov QWORD PTR [rcx+120], r13
|
|
|
- add rsp, 128
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_sqr_16 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Multiply a and b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r Result of multiplication.
|
|
|
-; * a First number to multiply.
|
|
|
-; * b Second number to multiply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mul_avx2_16 PROC
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- mov rbp, r8
|
|
|
- mov r8, rcx
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 128
|
|
|
- cmp r9, r8
|
|
|
- mov rbx, rsp
|
|
|
- cmovne rbx, r8
|
|
|
- cmp rbp, r8
|
|
|
- cmove rbx, rsp
|
|
|
- add r8, 128
|
|
|
- xor rdi, rdi
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- ; A[0] * B[0]
|
|
|
- mulx r11, r10, QWORD PTR [rbp]
|
|
|
- ; A[0] * B[1]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx], r10
|
|
|
- adcx r11, rax
|
|
|
- ; A[0] * B[2]
|
|
|
- mulx r13, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+8], r11
|
|
|
- adcx r12, rax
|
|
|
- ; A[0] * B[3]
|
|
|
- mulx r14, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r13, rax
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- ; A[0] * B[4]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r14, rax
|
|
|
- ; A[0] * B[5]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+32], r14
|
|
|
- adcx r10, rax
|
|
|
- ; A[0] * B[6]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- adcx r11, rax
|
|
|
- ; A[0] * B[7]
|
|
|
- mulx r13, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- ; A[0] * B[8]
|
|
|
- mulx r14, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r13, rax
|
|
|
- ; A[0] * B[9]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- ; A[0] * B[10]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- ; A[0] * B[11]
|
|
|
- mulx r12, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- ; A[0] * B[12]
|
|
|
- mulx r13, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r12, rax
|
|
|
- ; A[0] * B[13]
|
|
|
- mulx r14, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- ; A[0] * B[14]
|
|
|
- mulx r10, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- ; A[0] * B[15]
|
|
|
- mulx r11, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rdi
|
|
|
- mov r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mov r11, QWORD PTR [rbx+8]
|
|
|
- mov r12, QWORD PTR [rbx+16]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r14, QWORD PTR [rbx+32]
|
|
|
- mov r10, QWORD PTR [rbx+40]
|
|
|
- ; A[1] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[1] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[1] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+32], r14
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- ; A[1] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[1] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- ; A[1] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[1] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[1] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[1] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[1] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[1] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[1] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- mov r12, rdi
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mov r12, QWORD PTR [rbx+16]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r14, QWORD PTR [rbx+32]
|
|
|
- mov r10, QWORD PTR [rbx+40]
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- ; A[2] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[2] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+16], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[2] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[2] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+32], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- ; A[2] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[2] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[2] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- ; A[2] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[2] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[2] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[2] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[2] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[2] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[2] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r13, rdi
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- adcx r13, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mov r13, QWORD PTR [rbx+24]
|
|
|
- mov r14, QWORD PTR [rbx+32]
|
|
|
- mov r10, QWORD PTR [rbx+40]
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- ; A[3] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[3] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+24], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[3] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+32], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[3] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- ; A[3] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[3] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[3] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[3] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- ; A[3] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[3] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[3] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[3] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- ; A[3] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[3] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[3] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[3] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov r14, rdi
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- adcx r14, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mov r14, QWORD PTR [rbx+32]
|
|
|
- mov r10, QWORD PTR [rbx+40]
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- ; A[4] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+32], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[4] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- ; A[4] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[4] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[4] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[4] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[4] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[4] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- ; A[4] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[4] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[4] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[4] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov r10, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mov r10, QWORD PTR [rbx+40]
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- ; A[5] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+40], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[5] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[5] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- ; A[5] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[5] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[5] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[5] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- ; A[5] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[5] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[5] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- mov r11, rdi
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mov r11, QWORD PTR [rbx+48]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- ; A[6] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+48], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[6] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[6] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- ; A[6] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[6] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- ; A[6] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[6] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[6] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[6] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[6] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[6] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov r12, rdi
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mov r12, QWORD PTR [rbx+56]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- ; A[7] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[7] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+56], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[7] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- ; A[7] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[7] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[7] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- ; A[7] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[7] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[7] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- ; A[7] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[7] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov r13, rdi
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- adcx r13, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mov r13, QWORD PTR [rbx+64]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- ; A[8] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[8] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+64], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[8] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[8] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[8] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[8] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[8] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[8] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- ; A[8] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[8] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[8] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[8] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- ; A[8] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[8] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[8] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[8] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov r14, rdi
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- adcx r14, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mov r14, QWORD PTR [rbx+72]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- ; A[9] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[9] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+72], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[9] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[9] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[9] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[9] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[9] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[9] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[9] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[9] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[9] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[9] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- ; A[9] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[9] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[9] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[9] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- mov r10, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mov r10, QWORD PTR [rbx+80]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- ; A[10] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[10] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+80], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[10] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[10] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- ; A[10] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[10] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[10] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[10] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- ; A[10] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[10] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[10] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[10] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- ; A[10] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[10] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[10] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[10] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- mov r11, rdi
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mov r11, QWORD PTR [rbx+88]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- ; A[11] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[11] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+88], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[11] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[11] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- ; A[11] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[11] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[11] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[11] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- ; A[11] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[11] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[11] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[11] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- ; A[11] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[11] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[11] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[11] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov r12, rdi
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- adcx r12, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- mov rdx, QWORD PTR [r9+96]
|
|
|
- mov r12, QWORD PTR [rbx+96]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; A[12] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[12] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+96], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[12] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[12] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- ; A[12] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[12] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[12] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[12] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- ; A[12] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[12] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[12] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[12] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- mov r12, QWORD PTR [r8+88]
|
|
|
- ; A[12] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[12] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[12] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[12] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- mov r13, rdi
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- adcx r13, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- mov QWORD PTR [r8+96], r13
|
|
|
- mov rdx, QWORD PTR [r9+104]
|
|
|
- mov r13, QWORD PTR [rbx+104]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[13] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[13] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+104], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[13] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[13] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[13] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[13] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[13] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[13] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- ; A[13] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[13] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[13] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[13] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- mov r12, QWORD PTR [r8+88]
|
|
|
- mov r13, QWORD PTR [r8+96]
|
|
|
- ; A[13] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[13] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[13] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[13] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- mov r14, rdi
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- adcx r14, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+96], r13
|
|
|
- mov QWORD PTR [r8+104], r14
|
|
|
- mov rdx, QWORD PTR [r9+112]
|
|
|
- mov r14, QWORD PTR [rbx+112]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- ; A[14] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[14] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+112], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[14] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[14] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- ; A[14] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[14] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[14] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[14] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- ; A[14] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[14] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[14] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[14] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov r12, QWORD PTR [r8+88]
|
|
|
- mov r13, QWORD PTR [r8+96]
|
|
|
- mov r14, QWORD PTR [r8+104]
|
|
|
- ; A[14] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[14] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[14] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[14] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+96], r13
|
|
|
- mov r10, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- adcx r10, r15
|
|
|
- mov r15, rdi
|
|
|
- adox r15, rdi
|
|
|
- adcx r15, rdi
|
|
|
- mov QWORD PTR [r8+104], r14
|
|
|
- mov QWORD PTR [r8+112], r10
|
|
|
- mov rdx, QWORD PTR [r9+120]
|
|
|
- mov r10, QWORD PTR [rbx+120]
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r13, QWORD PTR [r8+16]
|
|
|
- mov r14, QWORD PTR [r8+24]
|
|
|
- ; A[15] * B[0]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] * B[1]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+8]
|
|
|
- mov QWORD PTR [rbx+120], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[15] * B[2]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+16]
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[15] * B[3]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+24]
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- mov QWORD PTR [r8+16], r13
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r13, QWORD PTR [r8+56]
|
|
|
- ; A[15] * B[4]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+32]
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[15] * B[5]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+40]
|
|
|
- mov QWORD PTR [r8+24], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] * B[6]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+48]
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[15] * B[7]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+56]
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov r14, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- mov r12, QWORD PTR [r8+88]
|
|
|
- ; A[15] * B[8]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[15] * B[9]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+72]
|
|
|
- mov QWORD PTR [r8+56], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[15] * B[10]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+80]
|
|
|
- mov QWORD PTR [r8+64], r14
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] * B[11]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+88]
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- mov r13, QWORD PTR [r8+96]
|
|
|
- mov r14, QWORD PTR [r8+104]
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- ; A[15] * B[12]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- ; A[15] * B[13]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+104]
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- adcx r13, rax
|
|
|
- adox r14, rcx
|
|
|
- ; A[15] * B[14]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+112]
|
|
|
- mov QWORD PTR [r8+96], r13
|
|
|
- adcx r14, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[15] * B[15]
|
|
|
- mulx rcx, rax, QWORD PTR [rbp+120]
|
|
|
- mov QWORD PTR [r8+104], r14
|
|
|
- mov r11, rdi
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- adcx r11, r15
|
|
|
- mov QWORD PTR [r8+112], r10
|
|
|
- mov QWORD PTR [r8+120], r11
|
|
|
- sub r8, 128
|
|
|
- cmp r9, r8
|
|
|
- je L_start_1024_mul_avx2_16
|
|
|
- cmp rbp, r8
|
|
|
- jne L_end_1024_mul_avx2_16
|
|
|
-L_start_1024_mul_avx2_16:
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx]
|
|
|
- vmovups OWORD PTR [r8], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+16]
|
|
|
- vmovups OWORD PTR [r8+16], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+32]
|
|
|
- vmovups OWORD PTR [r8+32], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+48]
|
|
|
- vmovups OWORD PTR [r8+48], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+64]
|
|
|
- vmovups OWORD PTR [r8+64], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+80]
|
|
|
- vmovups OWORD PTR [r8+80], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+96]
|
|
|
- vmovups OWORD PTR [r8+96], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbx+112]
|
|
|
- vmovups OWORD PTR [r8+112], xmm0
|
|
|
-L_end_1024_mul_avx2_16:
|
|
|
- add rsp, 128
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- ret
|
|
|
-sp_1024_mul_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Square a and put result in r. (r = a * a)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_sqr_avx2_16 PROC
|
|
|
- push rbp
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- mov r8, rcx
|
|
|
- mov r9, rdx
|
|
|
- sub rsp, 128
|
|
|
- cmp r9, r8
|
|
|
- mov rbp, rsp
|
|
|
- cmovne rbp, r8
|
|
|
- add r8, 128
|
|
|
- xor r13, r13
|
|
|
- ; Diagonal 1
|
|
|
- ; Zero into %r9
|
|
|
- ; Zero into %r10
|
|
|
- ; A[1] x A[0]
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- mulx r11, r10, QWORD PTR [r9+8]
|
|
|
- ; A[2] x A[0]
|
|
|
- mulx r12, rax, QWORD PTR [r9+16]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [rbp+8], r10
|
|
|
- mov QWORD PTR [rbp+16], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[3] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- ; A[4] x A[0]
|
|
|
- mulx r11, rax, QWORD PTR [r9+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r13
|
|
|
- mov QWORD PTR [rbp+24], r12
|
|
|
- mov QWORD PTR [rbp+32], r10
|
|
|
- ; Zero into %r10
|
|
|
- ; Zero into %r8
|
|
|
- ; A[5] x A[0]
|
|
|
- mulx r12, rax, QWORD PTR [r9+40]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- ; A[6] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [rbp+40], r11
|
|
|
- mov QWORD PTR [rbp+48], r12
|
|
|
- ; Zero into %r9
|
|
|
- ; Zero into %r10
|
|
|
- ; A[7] x A[0]
|
|
|
- mulx r11, rax, QWORD PTR [r9+56]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r13
|
|
|
- ; A[8] x A[0]
|
|
|
- mulx r12, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [rbp+56], r10
|
|
|
- mov QWORD PTR [rbp+64], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[9] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+72]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- ; A[10] x A[0]
|
|
|
- mulx r11, rax, QWORD PTR [r9+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, r13
|
|
|
- mov QWORD PTR [rbp+72], r12
|
|
|
- mov QWORD PTR [rbp+80], r10
|
|
|
- ; No load %r13 - %r10
|
|
|
- ; A[11] x A[0]
|
|
|
- mulx r15, rax, QWORD PTR [r9+88]
|
|
|
- adcx r11, rax
|
|
|
- adox r15, r13
|
|
|
- ; A[12] x A[0]
|
|
|
- mulx rdi, rax, QWORD PTR [r9+96]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, r13
|
|
|
- mov QWORD PTR [rbp+88], r11
|
|
|
- ; No store %r13 - %r10
|
|
|
- ; No load %r15 - %r9
|
|
|
- ; A[13] x A[0]
|
|
|
- mulx rsi, rax, QWORD PTR [r9+104]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, r13
|
|
|
- ; A[14] x A[0]
|
|
|
- mulx rbx, rax, QWORD PTR [r9+112]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, r13
|
|
|
- ; No store %r14 - %r8
|
|
|
- ; No store %r15 - %r9
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[15] x A[0]
|
|
|
- mulx r10, rax, QWORD PTR [r9+120]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, r13
|
|
|
- ; No store %rbx - %r10
|
|
|
- ; Carry
|
|
|
- adcx r10, r13
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- ; Diagonal 2
|
|
|
- mov r10, QWORD PTR [rbp+24]
|
|
|
- mov r11, QWORD PTR [rbp+32]
|
|
|
- mov r12, QWORD PTR [rbp+40]
|
|
|
- ; A[2] x A[1]
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+16]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[3] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+24]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbp+24], r10
|
|
|
- mov QWORD PTR [rbp+32], r11
|
|
|
- mov r10, QWORD PTR [rbp+48]
|
|
|
- mov r11, QWORD PTR [rbp+56]
|
|
|
- ; A[4] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[5] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbp+40], r12
|
|
|
- mov QWORD PTR [rbp+48], r10
|
|
|
- mov r12, QWORD PTR [rbp+64]
|
|
|
- mov r10, QWORD PTR [rbp+72]
|
|
|
- ; A[6] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[7] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbp+56], r11
|
|
|
- mov QWORD PTR [rbp+64], r12
|
|
|
- mov r11, QWORD PTR [rbp+80]
|
|
|
- mov r12, QWORD PTR [rbp+88]
|
|
|
- ; A[8] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[9] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbp+72], r10
|
|
|
- mov QWORD PTR [rbp+80], r11
|
|
|
- ; No load %r13 - %r8
|
|
|
- ; A[10] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r12, rax
|
|
|
- adox r15, rcx
|
|
|
- ; A[11] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- mov QWORD PTR [rbp+88], r12
|
|
|
- ; No store %r13 - %r8
|
|
|
- ; No load %r15 - %r10
|
|
|
- ; A[12] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[13] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r9
|
|
|
- ; No store %r15 - %r10
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[14] x A[1]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] x A[1]
|
|
|
- mulx r12, rax, QWORD PTR [r9+120]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[15] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx r10, rax, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- ; Diagonal 3
|
|
|
- mov r10, QWORD PTR [rbp+40]
|
|
|
- mov r11, QWORD PTR [rbp+48]
|
|
|
- mov r12, QWORD PTR [rbp+56]
|
|
|
- ; A[3] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+24]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[4] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbp+40], r10
|
|
|
- mov QWORD PTR [rbp+48], r11
|
|
|
- mov r10, QWORD PTR [rbp+64]
|
|
|
- mov r11, QWORD PTR [rbp+72]
|
|
|
- ; A[5] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[6] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbp+56], r12
|
|
|
- mov QWORD PTR [rbp+64], r10
|
|
|
- mov r12, QWORD PTR [rbp+80]
|
|
|
- mov r10, QWORD PTR [rbp+88]
|
|
|
- ; A[7] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[8] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [rbp+72], r11
|
|
|
- mov QWORD PTR [rbp+80], r12
|
|
|
- ; No load %r13 - %r9
|
|
|
- ; A[9] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r10, rax
|
|
|
- adox r15, rcx
|
|
|
- ; A[10] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- mov QWORD PTR [rbp+88], r10
|
|
|
- ; No store %r13 - %r9
|
|
|
- ; No load %r15 - %r8
|
|
|
- ; A[11] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[12] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r10
|
|
|
- ; No store %r15 - %r8
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- ; A[13] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx rbx, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[14] x A[2]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; No store %rbx - %r9
|
|
|
- mov QWORD PTR [r8], r12
|
|
|
- mov r11, QWORD PTR [r8+16]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[14] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[14] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx r12, rax, QWORD PTR [r9+112]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+8], r10
|
|
|
- mov QWORD PTR [r8+16], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[14] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx r10, rax, QWORD PTR [r9+112]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+24], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- ; Diagonal 4
|
|
|
- mov r10, QWORD PTR [rbp+56]
|
|
|
- mov r11, QWORD PTR [rbp+64]
|
|
|
- mov r12, QWORD PTR [rbp+72]
|
|
|
- ; A[4] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+32]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[5] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbp+56], r10
|
|
|
- mov QWORD PTR [rbp+64], r11
|
|
|
- mov r10, QWORD PTR [rbp+80]
|
|
|
- mov r11, QWORD PTR [rbp+88]
|
|
|
- ; A[6] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[7] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [rbp+72], r12
|
|
|
- mov QWORD PTR [rbp+80], r10
|
|
|
- ; No load %r13 - %r10
|
|
|
- ; A[8] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r11, rax
|
|
|
- adox r15, rcx
|
|
|
- ; A[9] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- mov QWORD PTR [rbp+88], r11
|
|
|
- ; No store %r13 - %r10
|
|
|
- ; No load %r15 - %r9
|
|
|
- ; A[10] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[11] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r8
|
|
|
- ; No store %r15 - %r9
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[12] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[13] x A[3]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; No store %rbx - %r10
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[13] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[13] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[13] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[13] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx r12, rax, QWORD PTR [r9+104]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[13] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx r10, rax, QWORD PTR [r9+104]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- ; Diagonal 5
|
|
|
- mov r10, QWORD PTR [rbp+72]
|
|
|
- mov r11, QWORD PTR [rbp+80]
|
|
|
- mov r12, QWORD PTR [rbp+88]
|
|
|
- ; A[5] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+40]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[6] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [rbp+72], r10
|
|
|
- mov QWORD PTR [rbp+80], r11
|
|
|
- ; No load %r13 - %r8
|
|
|
- ; A[7] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r12, rax
|
|
|
- adox r15, rcx
|
|
|
- ; A[8] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- mov QWORD PTR [rbp+88], r12
|
|
|
- ; No store %r13 - %r8
|
|
|
- ; No load %r15 - %r10
|
|
|
- ; A[9] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[10] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r9
|
|
|
- ; No store %r15 - %r10
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[11] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[12] x A[4]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- ; A[12] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[12] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- ; A[12] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[12] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+24], r11
|
|
|
- mov QWORD PTR [r8+32], r12
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[12] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+96]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[12] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx r12, rax, QWORD PTR [r9+96]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+40], r10
|
|
|
- mov QWORD PTR [r8+48], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[12] x A[11]
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mulx r10, rax, QWORD PTR [r9+96]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+56], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+64], r10
|
|
|
- ; Diagonal 6
|
|
|
- mov r10, QWORD PTR [rbp+88]
|
|
|
- ; No load %r13 - %r9
|
|
|
- ; A[6] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+48]
|
|
|
- adcx r10, rax
|
|
|
- adox r15, rcx
|
|
|
- ; A[7] x A[5]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- mov QWORD PTR [rbp+88], r10
|
|
|
- ; No store %r13 - %r9
|
|
|
- ; No load %r15 - %r8
|
|
|
- ; A[8] x A[5]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[9] x A[5]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r10
|
|
|
- ; No store %r15 - %r8
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- ; A[10] x A[5]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx rbx, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[11] x A[5]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; No store %rbx - %r9
|
|
|
- mov QWORD PTR [r8], r12
|
|
|
- mov r11, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [r8+24]
|
|
|
- ; A[11] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[11] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+8], r10
|
|
|
- mov QWORD PTR [r8+16], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[11] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[11] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+24], r12
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov r12, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [r8+56]
|
|
|
- ; A[11] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+88]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[13] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov QWORD PTR [r8+48], r12
|
|
|
- mov r11, QWORD PTR [r8+64]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[13] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+104]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[13] x A[11]
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mulx r12, rax, QWORD PTR [r9+104]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+56], r10
|
|
|
- mov QWORD PTR [r8+64], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[13] x A[12]
|
|
|
- mov rdx, QWORD PTR [r9+96]
|
|
|
- mulx r10, rax, QWORD PTR [r9+104]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+72], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+80], r10
|
|
|
- ; Diagonal 7
|
|
|
- ; No load %r15 - %r9
|
|
|
- ; A[7] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+56]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; A[8] x A[6]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx rsi, rax
|
|
|
- adox rbx, rcx
|
|
|
- ; No store %r14 - %r8
|
|
|
- ; No store %r15 - %r9
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[9] x A[6]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx rbx, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[10] x A[6]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; No store %rbx - %r10
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov r12, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- ; A[10] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[10] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov QWORD PTR [r8+16], r12
|
|
|
- mov r11, QWORD PTR [r8+32]
|
|
|
- mov r12, QWORD PTR [r8+40]
|
|
|
- ; A[10] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+80]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[14] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+24], r10
|
|
|
- mov QWORD PTR [r8+32], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- ; A[14] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[14] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+40], r12
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- mov r12, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- ; A[14] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[14] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+56], r11
|
|
|
- mov QWORD PTR [r8+64], r12
|
|
|
- mov r11, QWORD PTR [r8+80]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[14] x A[11]
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+112]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[14] x A[12]
|
|
|
- mov rdx, QWORD PTR [r9+96]
|
|
|
- mulx r12, rax, QWORD PTR [r9+112]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+72], r10
|
|
|
- mov QWORD PTR [r8+80], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[14] x A[13]
|
|
|
- mov rdx, QWORD PTR [r9+104]
|
|
|
- mulx r10, rax, QWORD PTR [r9+112]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+88], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+96], r10
|
|
|
- ; Diagonal 8
|
|
|
- mov r11, QWORD PTR [r8]
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- ; A[8] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+64]
|
|
|
- adcx rbx, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[9] x A[7]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; No store %rbx - %r8
|
|
|
- mov QWORD PTR [r8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- ; A[9] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+72]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[15] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+8], r12
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- ; A[15] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[15] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+24], r11
|
|
|
- mov QWORD PTR [r8+32], r12
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- ; A[15] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r8+40], r10
|
|
|
- mov QWORD PTR [r8+48], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- ; A[15] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- ; A[15] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- mov QWORD PTR [r8+56], r12
|
|
|
- mov QWORD PTR [r8+64], r10
|
|
|
- mov r12, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- ; A[15] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, rcx
|
|
|
- ; A[15] x A[11]
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, rcx
|
|
|
- mov QWORD PTR [r8+72], r11
|
|
|
- mov QWORD PTR [r8+80], r12
|
|
|
- mov r11, QWORD PTR [r8+96]
|
|
|
- ; Zero into %r10
|
|
|
- ; A[15] x A[12]
|
|
|
- mov rdx, QWORD PTR [r9+96]
|
|
|
- mulx rcx, rax, QWORD PTR [r9+120]
|
|
|
- adcx r10, rax
|
|
|
- adox r11, rcx
|
|
|
- ; A[15] x A[13]
|
|
|
- mov rdx, QWORD PTR [r9+104]
|
|
|
- mulx r12, rax, QWORD PTR [r9+120]
|
|
|
- adcx r11, rax
|
|
|
- adox r12, r13
|
|
|
- mov QWORD PTR [r8+88], r10
|
|
|
- mov QWORD PTR [r8+96], r11
|
|
|
- ; Zero into %r8
|
|
|
- ; Zero into %r9
|
|
|
- ; A[15] x A[14]
|
|
|
- mov rdx, QWORD PTR [r9+112]
|
|
|
- mulx r10, rax, QWORD PTR [r9+120]
|
|
|
- adcx r12, rax
|
|
|
- adox r10, r13
|
|
|
- mov QWORD PTR [r8+104], r12
|
|
|
- ; Carry
|
|
|
- adcx r10, r14
|
|
|
- mov r14, r13
|
|
|
- adcx r14, r13
|
|
|
- adox r14, r13
|
|
|
- mov QWORD PTR [r8+112], r10
|
|
|
- mov QWORD PTR [r8+120], r14
|
|
|
- ; Double and Add in A[i] x A[i]
|
|
|
- mov r11, QWORD PTR [rbp+8]
|
|
|
- ; A[0] x A[0]
|
|
|
- mov rdx, QWORD PTR [r9]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- mov QWORD PTR [rbp], rax
|
|
|
- adox r11, r11
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+8], r11
|
|
|
- mov r10, QWORD PTR [rbp+16]
|
|
|
- mov r11, QWORD PTR [rbp+24]
|
|
|
- ; A[1] x A[1]
|
|
|
- mov rdx, QWORD PTR [r9+8]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+16], r10
|
|
|
- mov QWORD PTR [rbp+24], r11
|
|
|
- mov r10, QWORD PTR [rbp+32]
|
|
|
- mov r11, QWORD PTR [rbp+40]
|
|
|
- ; A[2] x A[2]
|
|
|
- mov rdx, QWORD PTR [r9+16]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+32], r10
|
|
|
- mov QWORD PTR [rbp+40], r11
|
|
|
- mov r10, QWORD PTR [rbp+48]
|
|
|
- mov r11, QWORD PTR [rbp+56]
|
|
|
- ; A[3] x A[3]
|
|
|
- mov rdx, QWORD PTR [r9+24]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+48], r10
|
|
|
- mov QWORD PTR [rbp+56], r11
|
|
|
- mov r10, QWORD PTR [rbp+64]
|
|
|
- mov r11, QWORD PTR [rbp+72]
|
|
|
- ; A[4] x A[4]
|
|
|
- mov rdx, QWORD PTR [r9+32]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+64], r10
|
|
|
- mov QWORD PTR [rbp+72], r11
|
|
|
- mov r10, QWORD PTR [rbp+80]
|
|
|
- mov r11, QWORD PTR [rbp+88]
|
|
|
- ; A[5] x A[5]
|
|
|
- mov rdx, QWORD PTR [r9+40]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [rbp+80], r10
|
|
|
- mov QWORD PTR [rbp+88], r11
|
|
|
- ; A[6] x A[6]
|
|
|
- mov rdx, QWORD PTR [r9+48]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r15, r15
|
|
|
- adox rdi, rdi
|
|
|
- adcx r15, rax
|
|
|
- adcx rdi, rcx
|
|
|
- ; A[7] x A[7]
|
|
|
- mov rdx, QWORD PTR [r9+56]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox rsi, rsi
|
|
|
- adox rbx, rbx
|
|
|
- adcx rsi, rax
|
|
|
- adcx rbx, rcx
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- ; A[8] x A[8]
|
|
|
- mov rdx, QWORD PTR [r9+64]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8], r10
|
|
|
- mov QWORD PTR [r8+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- ; A[9] x A[9]
|
|
|
- mov rdx, QWORD PTR [r9+72]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+16], r10
|
|
|
- mov QWORD PTR [r8+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- ; A[10] x A[10]
|
|
|
- mov rdx, QWORD PTR [r9+80]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+32], r10
|
|
|
- mov QWORD PTR [r8+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- ; A[11] x A[11]
|
|
|
- mov rdx, QWORD PTR [r9+88]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+48], r10
|
|
|
- mov QWORD PTR [r8+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- ; A[12] x A[12]
|
|
|
- mov rdx, QWORD PTR [r9+96]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+64], r10
|
|
|
- mov QWORD PTR [r8+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- ; A[13] x A[13]
|
|
|
- mov rdx, QWORD PTR [r9+104]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+80], r10
|
|
|
- mov QWORD PTR [r8+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- ; A[14] x A[14]
|
|
|
- mov rdx, QWORD PTR [r9+112]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+96], r10
|
|
|
- mov QWORD PTR [r8+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- ; A[15] x A[15]
|
|
|
- mov rdx, QWORD PTR [r9+120]
|
|
|
- mulx rcx, rax, rdx
|
|
|
- adox r10, r10
|
|
|
- adox r11, r11
|
|
|
- adcx r10, rax
|
|
|
- adcx r11, rcx
|
|
|
- mov QWORD PTR [r8+112], r10
|
|
|
- mov QWORD PTR [r8+120], r11
|
|
|
- mov QWORD PTR [r8+-32], r15
|
|
|
- mov QWORD PTR [r8+-24], rdi
|
|
|
- mov QWORD PTR [r8+-16], rsi
|
|
|
- mov QWORD PTR [r8+-8], rbx
|
|
|
- sub r8, 128
|
|
|
- cmp r9, r8
|
|
|
- jne L_end_1024_sqr_avx2_16
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp]
|
|
|
- vmovups OWORD PTR [r8], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+16]
|
|
|
- vmovups OWORD PTR [r8+16], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+32]
|
|
|
- vmovups OWORD PTR [r8+32], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+48]
|
|
|
- vmovups OWORD PTR [r8+48], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+64]
|
|
|
- vmovups OWORD PTR [r8+64], xmm0
|
|
|
- vmovdqu xmm0, OWORD PTR [rbp+80]
|
|
|
- vmovups OWORD PTR [r8+80], xmm0
|
|
|
-L_end_1024_sqr_avx2_16:
|
|
|
- add rsp, 128
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- pop rbp
|
|
|
- ret
|
|
|
-sp_1024_sqr_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Add b to a into r. (r = a + b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_add_16 PROC
|
|
|
- ; Add
|
|
|
- mov r9, QWORD PTR [rdx]
|
|
|
- xor rax, rax
|
|
|
- add r9, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov QWORD PTR [rcx], r9
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- mov r9, QWORD PTR [rdx+16]
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- adc r9, QWORD PTR [r8+16]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx+16], r9
|
|
|
- adc r10, QWORD PTR [r8+24]
|
|
|
- mov r9, QWORD PTR [rdx+32]
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r9, QWORD PTR [r8+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov QWORD PTR [rcx+32], r9
|
|
|
- adc r10, QWORD PTR [r8+40]
|
|
|
- mov r9, QWORD PTR [rdx+48]
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- adc r9, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+48], r9
|
|
|
- adc r10, QWORD PTR [r8+56]
|
|
|
- mov r9, QWORD PTR [rdx+64]
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- adc r9, QWORD PTR [r8+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov QWORD PTR [rcx+64], r9
|
|
|
- adc r10, QWORD PTR [r8+72]
|
|
|
- mov r9, QWORD PTR [rdx+80]
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r9, QWORD PTR [r8+80]
|
|
|
- mov r10, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+80], r9
|
|
|
- adc r10, QWORD PTR [r8+88]
|
|
|
- mov r9, QWORD PTR [rdx+96]
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- adc r9, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov QWORD PTR [rcx+96], r9
|
|
|
- adc r10, QWORD PTR [r8+104]
|
|
|
- mov r9, QWORD PTR [rdx+112]
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- adc r9, QWORD PTR [r8+112]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+112], r9
|
|
|
- adc r10, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- adc rax, 0
|
|
|
- ret
|
|
|
-sp_1024_add_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Sub b from a into a. (a -= b)
|
|
|
-; *
|
|
|
-; * a A single precision integer and result.
|
|
|
-; * b A single precision integer.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_sub_in_place_16 PROC
|
|
|
- mov r8, QWORD PTR [rcx]
|
|
|
- sub r8, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov QWORD PTR [rcx], r8
|
|
|
- sbb r9, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rcx+16]
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- sbb r8, QWORD PTR [rdx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- mov QWORD PTR [rcx+16], r8
|
|
|
- sbb r9, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rcx+32]
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- sbb r8, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- mov QWORD PTR [rcx+32], r8
|
|
|
- sbb r9, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rcx+48]
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- sbb r8, QWORD PTR [rdx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- mov QWORD PTR [rcx+48], r8
|
|
|
- sbb r9, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rcx+64]
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- sbb r8, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- mov QWORD PTR [rcx+64], r8
|
|
|
- sbb r9, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rcx+80]
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- sbb r8, QWORD PTR [rdx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- mov QWORD PTR [rcx+80], r8
|
|
|
- sbb r9, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rcx+96]
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- sbb r8, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- mov QWORD PTR [rcx+96], r8
|
|
|
- sbb r9, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rcx+112]
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- sbb r8, QWORD PTR [rdx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx+112], r8
|
|
|
- sbb r9, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- sbb rax, rax
|
|
|
- ret
|
|
|
-sp_1024_sub_in_place_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_cond_sub_16 PROC
|
|
|
- sub rsp, 128
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r9
|
|
|
- and r11, r9
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r8, QWORD PTR [rsp]
|
|
|
- sub r10, r8
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- mov r8, QWORD PTR [rsp+8]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r8, QWORD PTR [rsp+16]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- mov r8, QWORD PTR [rsp+24]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r8, QWORD PTR [rsp+32]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- mov r8, QWORD PTR [rsp+40]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r8, QWORD PTR [rsp+48]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- mov r8, QWORD PTR [rsp+56]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov r8, QWORD PTR [rsp+64]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov r11, QWORD PTR [rdx+72]
|
|
|
- mov r8, QWORD PTR [rsp+72]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r8, QWORD PTR [rsp+80]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- mov r8, QWORD PTR [rsp+88]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- mov r8, QWORD PTR [rsp+96]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- mov r8, QWORD PTR [rsp+104]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r8, QWORD PTR [rsp+112]
|
|
|
- sbb r10, r8
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- mov r8, QWORD PTR [rsp+120]
|
|
|
- sbb r11, r8
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- sbb rax, rax
|
|
|
- add rsp, 128
|
|
|
- ret
|
|
|
-sp_1024_cond_sub_16 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Conditionally subtract b from a using the mask m.
|
|
|
-; * m is -1 to subtract and 0 when not copying.
|
|
|
-; *
|
|
|
-; * r A single precision number representing condition subtract result.
|
|
|
-; * a A single precision number to subtract from.
|
|
|
-; * b A single precision number to subtract.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_cond_sub_avx2_16 PROC
|
|
|
- push r12
|
|
|
- mov r12, QWORD PTR [r8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- pext r12, r12, r9
|
|
|
- sub r10, r12
|
|
|
- mov r12, QWORD PTR [r8+8]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+24]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [rdx+32]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+48]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+56]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+96]
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- sbb r10, r11
|
|
|
- mov r12, QWORD PTR [r8+104]
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- pext r12, r12, r9
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- sbb r11, r12
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- pext r10, r10, r9
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- sbb r12, r10
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov r10, QWORD PTR [rdx+120]
|
|
|
- pext r11, r11, r9
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- sbb r10, r11
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- sbb rax, rax
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_cond_sub_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mul_d_16 PROC
|
|
|
- push r12
|
|
|
- mov r9, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- mov r10, rax
|
|
|
- mov r11, rdx
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- ; A[1] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[2] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+16], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[3] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[4] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[5] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[6] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[7] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[8] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+64], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[9] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[10] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[11] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[12] * B
|
|
|
- mov rax, r8
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- add r10, rax
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- adc r11, rdx
|
|
|
- adc r12, 0
|
|
|
- ; A[13] * B
|
|
|
- mov rax, r8
|
|
|
- xor r10, r10
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- add r11, rax
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- adc r12, rdx
|
|
|
- adc r10, 0
|
|
|
- ; A[14] * B
|
|
|
- mov rax, r8
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- add r12, rax
|
|
|
- mov QWORD PTR [rcx+112], r12
|
|
|
- adc r10, rdx
|
|
|
- adc r11, 0
|
|
|
- ; A[15] * B
|
|
|
- mov rax, r8
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- add r10, rax
|
|
|
- adc r11, rdx
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mul_d_16 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Mul a by digit b into r. (r = a * b)
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision digit.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mul_d_avx2_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, rdx
|
|
|
- ; A[0] * B
|
|
|
- mov rdx, r8
|
|
|
- xor r13, r13
|
|
|
- mulx r12, r11, QWORD PTR [rax]
|
|
|
- mov QWORD PTR [rcx], r11
|
|
|
- ; A[1] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+8]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+8], r12
|
|
|
- ; A[2] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+16]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- ; A[3] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+24]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- ; A[4] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+32]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+32], r11
|
|
|
- ; A[5] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+40]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+40], r12
|
|
|
- ; A[6] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+48]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- ; A[7] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+56]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- ; A[8] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+64]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+64], r11
|
|
|
- ; A[9] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+72]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+72], r12
|
|
|
- ; A[10] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+80]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- ; A[11] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+88]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- ; A[12] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+96]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+96], r11
|
|
|
- ; A[13] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+104]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- mov QWORD PTR [rcx+104], r12
|
|
|
- ; A[14] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+112]
|
|
|
- mov r12, r13
|
|
|
- adcx r11, r9
|
|
|
- adox r12, r10
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- ; A[15] * B
|
|
|
- mulx r10, r9, QWORD PTR [rax+120]
|
|
|
- mov r11, r13
|
|
|
- adcx r12, r9
|
|
|
- adox r11, r10
|
|
|
- adcx r11, r13
|
|
|
- mov QWORD PTR [rcx+120], r12
|
|
|
- mov QWORD PTR [rcx+128], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mul_d_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF _WIN64
|
|
|
-; /* Divide the double width number (d1|d0) by the dividend. (d1|d0 / div)
|
|
|
-; *
|
|
|
-; * d1 The high order half of the number to divide.
|
|
|
-; * d0 The low order half of the number to divide.
|
|
|
-; * div The dividend.
|
|
|
-; * returns the result of the division.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-div_1024_word_asm_16 PROC
|
|
|
- mov r9, rdx
|
|
|
- mov rax, r9
|
|
|
- mov rdx, rcx
|
|
|
- div r8
|
|
|
- ret
|
|
|
-div_1024_word_asm_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Compare a with b in constant time.
|
|
|
-; *
|
|
|
-; * a A single precision integer.
|
|
|
-; * b A single precision integer.
|
|
|
-; * return -ve, 0 or +ve if a is less than, equal to or greater than b
|
|
|
-; * respectively.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_cmp_16 PROC
|
|
|
- push r12
|
|
|
- xor r9, r9
|
|
|
- mov r8, -1
|
|
|
- mov rax, -1
|
|
|
- mov r10, 1
|
|
|
- mov r11, QWORD PTR [rcx+120]
|
|
|
- mov r12, QWORD PTR [rdx+120]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+112]
|
|
|
- mov r12, QWORD PTR [rdx+112]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+104]
|
|
|
- mov r12, QWORD PTR [rdx+104]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+96]
|
|
|
- mov r12, QWORD PTR [rdx+96]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+88]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+80]
|
|
|
- mov r12, QWORD PTR [rdx+80]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+72]
|
|
|
- mov r12, QWORD PTR [rdx+72]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+64]
|
|
|
- mov r12, QWORD PTR [rdx+64]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- mov r12, QWORD PTR [rdx+56]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+48]
|
|
|
- mov r12, QWORD PTR [rdx+48]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- mov r12, QWORD PTR [rdx+40]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+32]
|
|
|
- mov r12, QWORD PTR [rdx+32]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+16]
|
|
|
- mov r12, QWORD PTR [rdx+16]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx+8]
|
|
|
- mov r12, QWORD PTR [rdx+8]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- mov r11, QWORD PTR [rcx]
|
|
|
- mov r12, QWORD PTR [rdx]
|
|
|
- and r11, r8
|
|
|
- and r12, r8
|
|
|
- sub r11, r12
|
|
|
- cmova rax, r10
|
|
|
- cmovc rax, r8
|
|
|
- cmovnz r8, r9
|
|
|
- xor rax, r8
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_cmp_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Conditionally copy a into r using the mask m.
|
|
|
-; * m is -1 to copy and 0 when not.
|
|
|
-; *
|
|
|
-; * r A single precision number to copy over.
|
|
|
-; * a A single precision number to copy.
|
|
|
-; * m Mask value to apply.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_cond_copy_16 PROC
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [rcx+16]
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- xor rax, QWORD PTR [rdx]
|
|
|
- xor r9, QWORD PTR [rdx+8]
|
|
|
- xor r10, QWORD PTR [rdx+16]
|
|
|
- xor r11, QWORD PTR [rdx+24]
|
|
|
- and rax, r8
|
|
|
- and r9, r8
|
|
|
- and r10, r8
|
|
|
- and r11, r8
|
|
|
- xor QWORD PTR [rcx], rax
|
|
|
- xor QWORD PTR [rcx+8], r9
|
|
|
- xor QWORD PTR [rcx+16], r10
|
|
|
- xor QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- mov r10, QWORD PTR [rcx+48]
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- xor rax, QWORD PTR [rdx+32]
|
|
|
- xor r9, QWORD PTR [rdx+40]
|
|
|
- xor r10, QWORD PTR [rdx+48]
|
|
|
- xor r11, QWORD PTR [rdx+56]
|
|
|
- and rax, r8
|
|
|
- and r9, r8
|
|
|
- and r10, r8
|
|
|
- and r11, r8
|
|
|
- xor QWORD PTR [rcx+32], rax
|
|
|
- xor QWORD PTR [rcx+40], r9
|
|
|
- xor QWORD PTR [rcx+48], r10
|
|
|
- xor QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- mov r10, QWORD PTR [rcx+80]
|
|
|
- mov r11, QWORD PTR [rcx+88]
|
|
|
- xor rax, QWORD PTR [rdx+64]
|
|
|
- xor r9, QWORD PTR [rdx+72]
|
|
|
- xor r10, QWORD PTR [rdx+80]
|
|
|
- xor r11, QWORD PTR [rdx+88]
|
|
|
- and rax, r8
|
|
|
- and r9, r8
|
|
|
- and r10, r8
|
|
|
- and r11, r8
|
|
|
- xor QWORD PTR [rcx+64], rax
|
|
|
- xor QWORD PTR [rcx+72], r9
|
|
|
- xor QWORD PTR [rcx+80], r10
|
|
|
- xor QWORD PTR [rcx+88], r11
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- mov r10, QWORD PTR [rcx+112]
|
|
|
- mov r11, QWORD PTR [rcx+120]
|
|
|
- xor rax, QWORD PTR [rdx+96]
|
|
|
- xor r9, QWORD PTR [rdx+104]
|
|
|
- xor r10, QWORD PTR [rdx+112]
|
|
|
- xor r11, QWORD PTR [rdx+120]
|
|
|
- and rax, r8
|
|
|
- and r9, r8
|
|
|
- and r10, r8
|
|
|
- and r11, r8
|
|
|
- xor QWORD PTR [rcx+96], rax
|
|
|
- xor QWORD PTR [rcx+104], r9
|
|
|
- xor QWORD PTR [rcx+112], r10
|
|
|
- xor QWORD PTR [rcx+120], r11
|
|
|
- ret
|
|
|
-sp_1024_cond_copy_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Reduce the number back to 1024 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_reduce_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- mov r9, rdx
|
|
|
- xor rsi, rsi
|
|
|
- ; i = 16
|
|
|
- mov r10, 16
|
|
|
- mov r15, QWORD PTR [rcx]
|
|
|
- mov rdi, QWORD PTR [rcx+8]
|
|
|
-L_1024_mont_reduce_16_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov r13, r15
|
|
|
- imul r13, r8
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9]
|
|
|
- add r15, rax
|
|
|
- adc r12, rdx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+8]
|
|
|
- mov r15, rdi
|
|
|
- add r15, rax
|
|
|
- adc r11, rdx
|
|
|
- add r15, r12
|
|
|
- adc r11, 0
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+16]
|
|
|
- mov rdi, QWORD PTR [rcx+16]
|
|
|
- add rdi, rax
|
|
|
- adc r12, rdx
|
|
|
- add rdi, r11
|
|
|
- adc r12, 0
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+24]
|
|
|
- mov r14, QWORD PTR [rcx+24]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+24], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+32]
|
|
|
- mov r14, QWORD PTR [rcx+32]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+32], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+40]
|
|
|
- mov r14, QWORD PTR [rcx+40]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+40], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+48]
|
|
|
- mov r14, QWORD PTR [rcx+48]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+48], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+56]
|
|
|
- mov r14, QWORD PTR [rcx+56]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+56], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+64]
|
|
|
- mov r14, QWORD PTR [rcx+64]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+64], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+72]
|
|
|
- mov r14, QWORD PTR [rcx+72]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+72], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+80]
|
|
|
- mov r14, QWORD PTR [rcx+80]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+80], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+88]
|
|
|
- mov r14, QWORD PTR [rcx+88]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+88], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+96]
|
|
|
- mov r14, QWORD PTR [rcx+96]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+96], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r11, r11
|
|
|
- mul QWORD PTR [r9+104]
|
|
|
- mov r14, QWORD PTR [rcx+104]
|
|
|
- add r14, rax
|
|
|
- adc r11, rdx
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+104], r14
|
|
|
- adc r11, 0
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mov rax, r13
|
|
|
- xor r12, r12
|
|
|
- mul QWORD PTR [r9+112]
|
|
|
- mov r14, QWORD PTR [rcx+112]
|
|
|
- add r14, rax
|
|
|
- adc r12, rdx
|
|
|
- add r14, r11
|
|
|
- mov QWORD PTR [rcx+112], r14
|
|
|
- adc r12, 0
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mov rax, r13
|
|
|
- mul QWORD PTR [r9+120]
|
|
|
- mov r14, QWORD PTR [rcx+120]
|
|
|
- add r12, rax
|
|
|
- adc rdx, rsi
|
|
|
- mov rsi, 0
|
|
|
- adc rsi, 0
|
|
|
- add r14, r12
|
|
|
- mov QWORD PTR [rcx+120], r14
|
|
|
- adc QWORD PTR [rcx+128], rdx
|
|
|
- adc rsi, 0
|
|
|
- ; i -= 1
|
|
|
- add rcx, 8
|
|
|
- dec r10
|
|
|
- jnz L_1024_mont_reduce_16_loop
|
|
|
- mov r14, QWORD PTR [rcx+120]
|
|
|
- mov QWORD PTR [rcx], r15
|
|
|
- sub r14, QWORD PTR [r9+120]
|
|
|
- mov QWORD PTR [rcx+8], rdi
|
|
|
- sbb r14, r14
|
|
|
- neg rsi
|
|
|
- not r14
|
|
|
- or rsi, r14
|
|
|
-IFDEF _WIN64
|
|
|
- mov r8, r9
|
|
|
- mov r9, rsi
|
|
|
-ELSE
|
|
|
- mov r9, rsi
|
|
|
- mov r8, r9
|
|
|
-ENDIF
|
|
|
- mov rdx, rcx
|
|
|
- mov rcx, rcx
|
|
|
- sub rcx, 128
|
|
|
- call sp_1024_cond_sub_16
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_reduce_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Add two Montgomery form numbers (r = a + b % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a First number to add in Montgomery form.
|
|
|
-; * b Second number to add in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_add_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- sub rsp, 128
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov r11, QWORD PTR [rdx+16]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- add rax, QWORD PTR [r8]
|
|
|
- mov r13, 0
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- adc r11, QWORD PTR [r8+16]
|
|
|
- adc r12, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov r11, QWORD PTR [rdx+48]
|
|
|
- mov r12, QWORD PTR [rdx+56]
|
|
|
- adc rax, QWORD PTR [r8+32]
|
|
|
- adc r10, QWORD PTR [r8+40]
|
|
|
- adc r11, QWORD PTR [r8+48]
|
|
|
- adc r12, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- adc rax, QWORD PTR [r8+64]
|
|
|
- adc r10, QWORD PTR [r8+72]
|
|
|
- adc r11, QWORD PTR [r8+80]
|
|
|
- adc r12, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov r11, QWORD PTR [rdx+112]
|
|
|
- mov r12, QWORD PTR [rdx+120]
|
|
|
- adc rax, QWORD PTR [r8+96]
|
|
|
- adc r10, QWORD PTR [r8+104]
|
|
|
- adc r11, QWORD PTR [r8+112]
|
|
|
- adc r12, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- mov QWORD PTR [rcx+120], r12
|
|
|
- sbb r13, 0
|
|
|
- sub r12, QWORD PTR [r9+120]
|
|
|
- sbb r12, r12
|
|
|
- not r12
|
|
|
- or r13, r12
|
|
|
- mov r11, QWORD PTR [r9]
|
|
|
- mov r12, QWORD PTR [r9+8]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- mov QWORD PTR [rsp+8], r12
|
|
|
- mov r11, QWORD PTR [r9+16]
|
|
|
- mov r12, QWORD PTR [r9+24]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+16], r11
|
|
|
- mov QWORD PTR [rsp+24], r12
|
|
|
- mov r11, QWORD PTR [r9+32]
|
|
|
- mov r12, QWORD PTR [r9+40]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- mov r11, QWORD PTR [r9+48]
|
|
|
- mov r12, QWORD PTR [r9+56]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+48], r11
|
|
|
- mov QWORD PTR [rsp+56], r12
|
|
|
- mov r11, QWORD PTR [r9+64]
|
|
|
- mov r12, QWORD PTR [r9+72]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+64], r11
|
|
|
- mov QWORD PTR [rsp+72], r12
|
|
|
- mov r11, QWORD PTR [r9+80]
|
|
|
- mov r12, QWORD PTR [r9+88]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+80], r11
|
|
|
- mov QWORD PTR [rsp+88], r12
|
|
|
- mov r11, QWORD PTR [r9+96]
|
|
|
- mov r12, QWORD PTR [r9+104]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+96], r11
|
|
|
- mov QWORD PTR [rsp+104], r12
|
|
|
- mov r11, QWORD PTR [r9+112]
|
|
|
- mov r12, QWORD PTR [r9+120]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+112], r11
|
|
|
- mov QWORD PTR [rsp+120], r12
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r10, QWORD PTR [rcx+8]
|
|
|
- sub rax, QWORD PTR [rsp]
|
|
|
- sbb r10, QWORD PTR [rsp+8]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov rax, QWORD PTR [rcx+16]
|
|
|
- mov r10, QWORD PTR [rcx+24]
|
|
|
- sbb rax, QWORD PTR [rsp+16]
|
|
|
- sbb r10, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [rcx+40]
|
|
|
- sbb rax, QWORD PTR [rsp+32]
|
|
|
- sbb r10, QWORD PTR [rsp+40]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov rax, QWORD PTR [rcx+48]
|
|
|
- mov r10, QWORD PTR [rcx+56]
|
|
|
- sbb rax, QWORD PTR [rsp+48]
|
|
|
- sbb r10, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r10, QWORD PTR [rcx+72]
|
|
|
- sbb rax, QWORD PTR [rsp+64]
|
|
|
- sbb r10, QWORD PTR [rsp+72]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- mov rax, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [rcx+88]
|
|
|
- sbb rax, QWORD PTR [rsp+80]
|
|
|
- sbb r10, QWORD PTR [rsp+88]
|
|
|
- mov QWORD PTR [rcx+80], rax
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r10, QWORD PTR [rcx+104]
|
|
|
- sbb rax, QWORD PTR [rsp+96]
|
|
|
- sbb r10, QWORD PTR [rsp+104]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- mov rax, QWORD PTR [rcx+112]
|
|
|
- mov r10, QWORD PTR [rcx+120]
|
|
|
- sbb rax, QWORD PTR [rsp+112]
|
|
|
- sbb r10, QWORD PTR [rsp+120]
|
|
|
- mov QWORD PTR [rcx+112], rax
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- add rsp, 128
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_add_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Double a Montgomery form number (r = a + a % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a Number to souble in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_dbl_16 PROC
|
|
|
- push r12
|
|
|
- sub rsp, 128
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- add rax, QWORD PTR [rdx]
|
|
|
- mov r12, 0
|
|
|
- adc r9, QWORD PTR [rdx+8]
|
|
|
- adc r10, QWORD PTR [rdx+16]
|
|
|
- adc r11, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- adc rax, QWORD PTR [rdx+32]
|
|
|
- adc r9, QWORD PTR [rdx+40]
|
|
|
- adc r10, QWORD PTR [rdx+48]
|
|
|
- adc r11, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- adc rax, QWORD PTR [rdx+64]
|
|
|
- adc r9, QWORD PTR [rdx+72]
|
|
|
- adc r10, QWORD PTR [rdx+80]
|
|
|
- adc r11, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- adc rax, QWORD PTR [rdx+96]
|
|
|
- adc r9, QWORD PTR [rdx+104]
|
|
|
- adc r10, QWORD PTR [rdx+112]
|
|
|
- adc r11, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- sbb r12, 0
|
|
|
- sub r11, QWORD PTR [r8+120]
|
|
|
- sbb r11, r11
|
|
|
- not r11
|
|
|
- or r12, r11
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- sub rax, QWORD PTR [rsp]
|
|
|
- sbb r9, QWORD PTR [rsp+8]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov rax, QWORD PTR [rcx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- sbb rax, QWORD PTR [rsp+16]
|
|
|
- sbb r9, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- sbb rax, QWORD PTR [rsp+32]
|
|
|
- sbb r9, QWORD PTR [rsp+40]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- sbb rax, QWORD PTR [rsp+48]
|
|
|
- sbb r9, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- sbb rax, QWORD PTR [rsp+64]
|
|
|
- sbb r9, QWORD PTR [rsp+72]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov rax, QWORD PTR [rcx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- sbb rax, QWORD PTR [rsp+80]
|
|
|
- sbb r9, QWORD PTR [rsp+88]
|
|
|
- mov QWORD PTR [rcx+80], rax
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- sbb rax, QWORD PTR [rsp+96]
|
|
|
- sbb r9, QWORD PTR [rsp+104]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov rax, QWORD PTR [rcx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- sbb rax, QWORD PTR [rsp+112]
|
|
|
- sbb r9, QWORD PTR [rsp+120]
|
|
|
- mov QWORD PTR [rcx+112], rax
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- add rsp, 128
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_dbl_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Triple a Montgomery form number (r = a + a + a % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a Number to souble in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_tpl_16 PROC
|
|
|
- push r12
|
|
|
- sub rsp, 128
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- add rax, QWORD PTR [rdx]
|
|
|
- mov r12, 0
|
|
|
- adc r9, QWORD PTR [rdx+8]
|
|
|
- adc r10, QWORD PTR [rdx+16]
|
|
|
- adc r11, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- adc rax, QWORD PTR [rdx+32]
|
|
|
- adc r9, QWORD PTR [rdx+40]
|
|
|
- adc r10, QWORD PTR [rdx+48]
|
|
|
- adc r11, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- adc rax, QWORD PTR [rdx+64]
|
|
|
- adc r9, QWORD PTR [rdx+72]
|
|
|
- adc r10, QWORD PTR [rdx+80]
|
|
|
- adc r11, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- adc rax, QWORD PTR [rdx+96]
|
|
|
- adc r9, QWORD PTR [rdx+104]
|
|
|
- adc r10, QWORD PTR [rdx+112]
|
|
|
- adc r11, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- sbb r12, 0
|
|
|
- sub r11, QWORD PTR [r8+120]
|
|
|
- sbb r11, r11
|
|
|
- not r11
|
|
|
- or r12, r11
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- sub rax, QWORD PTR [rsp]
|
|
|
- sbb r9, QWORD PTR [rsp+8]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov rax, QWORD PTR [rcx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- sbb rax, QWORD PTR [rsp+16]
|
|
|
- sbb r9, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- sbb rax, QWORD PTR [rsp+32]
|
|
|
- sbb r9, QWORD PTR [rsp+40]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- sbb rax, QWORD PTR [rsp+48]
|
|
|
- sbb r9, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- sbb rax, QWORD PTR [rsp+64]
|
|
|
- sbb r9, QWORD PTR [rsp+72]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov rax, QWORD PTR [rcx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- sbb rax, QWORD PTR [rsp+80]
|
|
|
- sbb r9, QWORD PTR [rsp+88]
|
|
|
- mov QWORD PTR [rcx+80], rax
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- sbb rax, QWORD PTR [rsp+96]
|
|
|
- sbb r9, QWORD PTR [rsp+104]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov rax, QWORD PTR [rcx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- sbb rax, QWORD PTR [rsp+112]
|
|
|
- sbb r9, QWORD PTR [rsp+120]
|
|
|
- mov QWORD PTR [rcx+112], rax
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [rcx+16]
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- add rax, QWORD PTR [rdx]
|
|
|
- mov r12, 0
|
|
|
- adc r9, QWORD PTR [rdx+8]
|
|
|
- adc r10, QWORD PTR [rdx+16]
|
|
|
- adc r11, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- mov r10, QWORD PTR [rcx+48]
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- adc rax, QWORD PTR [rdx+32]
|
|
|
- adc r9, QWORD PTR [rdx+40]
|
|
|
- adc r10, QWORD PTR [rdx+48]
|
|
|
- adc r11, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- mov r10, QWORD PTR [rcx+80]
|
|
|
- mov r11, QWORD PTR [rcx+88]
|
|
|
- adc rax, QWORD PTR [rdx+64]
|
|
|
- adc r9, QWORD PTR [rdx+72]
|
|
|
- adc r10, QWORD PTR [rdx+80]
|
|
|
- adc r11, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- mov r10, QWORD PTR [rcx+112]
|
|
|
- mov r11, QWORD PTR [rcx+120]
|
|
|
- adc rax, QWORD PTR [rdx+96]
|
|
|
- adc r9, QWORD PTR [rdx+104]
|
|
|
- adc r10, QWORD PTR [rdx+112]
|
|
|
- adc r11, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- sbb r12, 0
|
|
|
- sub r11, QWORD PTR [r8+120]
|
|
|
- sbb r11, r11
|
|
|
- not r11
|
|
|
- or r12, r11
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov QWORD PTR [rsp+8], r11
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov QWORD PTR [rsp+24], r11
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov QWORD PTR [rsp+40], r11
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov QWORD PTR [rsp+56], r11
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov QWORD PTR [rsp+72], r11
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov QWORD PTR [rsp+88], r11
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov QWORD PTR [rsp+104], r11
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- and r10, r12
|
|
|
- and r11, r12
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov QWORD PTR [rsp+120], r11
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- sub rax, QWORD PTR [rsp]
|
|
|
- sbb r9, QWORD PTR [rsp+8]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov rax, QWORD PTR [rcx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- sbb rax, QWORD PTR [rsp+16]
|
|
|
- sbb r9, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- sbb rax, QWORD PTR [rsp+32]
|
|
|
- sbb r9, QWORD PTR [rsp+40]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- sbb rax, QWORD PTR [rsp+48]
|
|
|
- sbb r9, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- sbb rax, QWORD PTR [rsp+64]
|
|
|
- sbb r9, QWORD PTR [rsp+72]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov rax, QWORD PTR [rcx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- sbb rax, QWORD PTR [rsp+80]
|
|
|
- sbb r9, QWORD PTR [rsp+88]
|
|
|
- mov QWORD PTR [rcx+80], rax
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- sbb rax, QWORD PTR [rsp+96]
|
|
|
- sbb r9, QWORD PTR [rsp+104]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov rax, QWORD PTR [rcx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- sbb rax, QWORD PTR [rsp+112]
|
|
|
- sbb r9, QWORD PTR [rsp+120]
|
|
|
- mov QWORD PTR [rcx+112], rax
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- add rsp, 128
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_tpl_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Subtract two Montgomery form numbers (r = a - b % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a First number to add in Montgomery form.
|
|
|
-; * b Second number to add in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_sub_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- sub rsp, 128
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov r11, QWORD PTR [rdx+16]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- sub rax, QWORD PTR [r8]
|
|
|
- mov r13, 0
|
|
|
- sbb r10, QWORD PTR [r8+8]
|
|
|
- sbb r11, QWORD PTR [r8+16]
|
|
|
- sbb r12, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov r11, QWORD PTR [rdx+48]
|
|
|
- mov r12, QWORD PTR [rdx+56]
|
|
|
- sbb rax, QWORD PTR [r8+32]
|
|
|
- sbb r10, QWORD PTR [r8+40]
|
|
|
- sbb r11, QWORD PTR [r8+48]
|
|
|
- sbb r12, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- sbb rax, QWORD PTR [r8+64]
|
|
|
- sbb r10, QWORD PTR [r8+72]
|
|
|
- sbb r11, QWORD PTR [r8+80]
|
|
|
- sbb r12, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov r11, QWORD PTR [rdx+112]
|
|
|
- mov r12, QWORD PTR [rdx+120]
|
|
|
- sbb rax, QWORD PTR [r8+96]
|
|
|
- sbb r10, QWORD PTR [r8+104]
|
|
|
- sbb r11, QWORD PTR [r8+112]
|
|
|
- sbb r12, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- mov QWORD PTR [rcx+120], r12
|
|
|
- sbb r13, 0
|
|
|
- mov r11, QWORD PTR [r9]
|
|
|
- mov r12, QWORD PTR [r9+8]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp], r11
|
|
|
- mov QWORD PTR [rsp+8], r12
|
|
|
- mov r11, QWORD PTR [r9+16]
|
|
|
- mov r12, QWORD PTR [r9+24]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+16], r11
|
|
|
- mov QWORD PTR [rsp+24], r12
|
|
|
- mov r11, QWORD PTR [r9+32]
|
|
|
- mov r12, QWORD PTR [r9+40]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+32], r11
|
|
|
- mov QWORD PTR [rsp+40], r12
|
|
|
- mov r11, QWORD PTR [r9+48]
|
|
|
- mov r12, QWORD PTR [r9+56]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+48], r11
|
|
|
- mov QWORD PTR [rsp+56], r12
|
|
|
- mov r11, QWORD PTR [r9+64]
|
|
|
- mov r12, QWORD PTR [r9+72]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+64], r11
|
|
|
- mov QWORD PTR [rsp+72], r12
|
|
|
- mov r11, QWORD PTR [r9+80]
|
|
|
- mov r12, QWORD PTR [r9+88]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+80], r11
|
|
|
- mov QWORD PTR [rsp+88], r12
|
|
|
- mov r11, QWORD PTR [r9+96]
|
|
|
- mov r12, QWORD PTR [r9+104]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+96], r11
|
|
|
- mov QWORD PTR [rsp+104], r12
|
|
|
- mov r11, QWORD PTR [r9+112]
|
|
|
- mov r12, QWORD PTR [r9+120]
|
|
|
- and r11, r13
|
|
|
- and r12, r13
|
|
|
- mov QWORD PTR [rsp+112], r11
|
|
|
- mov QWORD PTR [rsp+120], r12
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r10, QWORD PTR [rcx+8]
|
|
|
- add rax, QWORD PTR [rsp]
|
|
|
- adc r10, QWORD PTR [rsp+8]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov rax, QWORD PTR [rcx+16]
|
|
|
- mov r10, QWORD PTR [rcx+24]
|
|
|
- adc rax, QWORD PTR [rsp+16]
|
|
|
- adc r10, QWORD PTR [rsp+24]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [rcx+40]
|
|
|
- adc rax, QWORD PTR [rsp+32]
|
|
|
- adc r10, QWORD PTR [rsp+40]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov rax, QWORD PTR [rcx+48]
|
|
|
- mov r10, QWORD PTR [rcx+56]
|
|
|
- adc rax, QWORD PTR [rsp+48]
|
|
|
- adc r10, QWORD PTR [rsp+56]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r10, QWORD PTR [rcx+72]
|
|
|
- adc rax, QWORD PTR [rsp+64]
|
|
|
- adc r10, QWORD PTR [rsp+72]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- mov rax, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [rcx+88]
|
|
|
- adc rax, QWORD PTR [rsp+80]
|
|
|
- adc r10, QWORD PTR [rsp+88]
|
|
|
- mov QWORD PTR [rcx+80], rax
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r10, QWORD PTR [rcx+104]
|
|
|
- adc rax, QWORD PTR [rsp+96]
|
|
|
- adc r10, QWORD PTR [rsp+104]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- mov rax, QWORD PTR [rcx+112]
|
|
|
- mov r10, QWORD PTR [rcx+120]
|
|
|
- adc rax, QWORD PTR [rsp+112]
|
|
|
- adc r10, QWORD PTR [rsp+120]
|
|
|
- mov QWORD PTR [rcx+112], rax
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- add rsp, 128
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_sub_16 ENDP
|
|
|
-_text ENDS
|
|
|
-; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
|
|
|
-; *
|
|
|
-; * r Result of division by 2.
|
|
|
-; * a Number to divide.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_div2_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- sub rsp, 128
|
|
|
- mov r13, QWORD PTR [rdx]
|
|
|
- xor r12, r12
|
|
|
- mov rax, r13
|
|
|
- and r13, 1
|
|
|
- neg r13
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp], r10
|
|
|
- mov r10, QWORD PTR [r8+8]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+8], r10
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+16], r10
|
|
|
- mov r10, QWORD PTR [r8+24]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+24], r10
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+32], r10
|
|
|
- mov r10, QWORD PTR [r8+40]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+40], r10
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+48], r10
|
|
|
- mov r10, QWORD PTR [r8+56]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+56], r10
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+64], r10
|
|
|
- mov r10, QWORD PTR [r8+72]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+72], r10
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+80], r10
|
|
|
- mov r10, QWORD PTR [r8+88]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+88], r10
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+96], r10
|
|
|
- mov r10, QWORD PTR [r8+104]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+104], r10
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+112], r10
|
|
|
- mov r10, QWORD PTR [r8+120]
|
|
|
- and r10, r13
|
|
|
- mov QWORD PTR [rsp+120], r10
|
|
|
- add QWORD PTR [rsp], rax
|
|
|
- mov rax, QWORD PTR [rdx+8]
|
|
|
- adc QWORD PTR [rsp+8], rax
|
|
|
- mov rax, QWORD PTR [rdx+16]
|
|
|
- adc QWORD PTR [rsp+16], rax
|
|
|
- mov rax, QWORD PTR [rdx+24]
|
|
|
- adc QWORD PTR [rsp+24], rax
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- adc QWORD PTR [rsp+32], rax
|
|
|
- mov rax, QWORD PTR [rdx+40]
|
|
|
- adc QWORD PTR [rsp+40], rax
|
|
|
- mov rax, QWORD PTR [rdx+48]
|
|
|
- adc QWORD PTR [rsp+48], rax
|
|
|
- mov rax, QWORD PTR [rdx+56]
|
|
|
- adc QWORD PTR [rsp+56], rax
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- adc QWORD PTR [rsp+64], rax
|
|
|
- mov rax, QWORD PTR [rdx+72]
|
|
|
- adc QWORD PTR [rsp+72], rax
|
|
|
- mov rax, QWORD PTR [rdx+80]
|
|
|
- adc QWORD PTR [rsp+80], rax
|
|
|
- mov rax, QWORD PTR [rdx+88]
|
|
|
- adc QWORD PTR [rsp+88], rax
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- adc QWORD PTR [rsp+96], rax
|
|
|
- mov rax, QWORD PTR [rdx+104]
|
|
|
- adc QWORD PTR [rsp+104], rax
|
|
|
- mov rax, QWORD PTR [rdx+112]
|
|
|
- adc QWORD PTR [rsp+112], rax
|
|
|
- mov rax, QWORD PTR [rdx+120]
|
|
|
- adc QWORD PTR [rsp+120], rax
|
|
|
- adc r12, 0
|
|
|
- mov rax, QWORD PTR [rsp]
|
|
|
- mov r9, QWORD PTR [rsp+8]
|
|
|
- shrd rax, r9, 1
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov rax, QWORD PTR [rsp+16]
|
|
|
- shrd r9, rax, 1
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov r9, QWORD PTR [rsp+24]
|
|
|
- shrd rax, r9, 1
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov rax, QWORD PTR [rsp+32]
|
|
|
- shrd r9, rax, 1
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- mov r9, QWORD PTR [rsp+40]
|
|
|
- shrd rax, r9, 1
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov rax, QWORD PTR [rsp+48]
|
|
|
- shrd r9, rax, 1
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov r9, QWORD PTR [rsp+56]
|
|
|
- shrd rax, r9, 1
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov rax, QWORD PTR [rsp+64]
|
|
|
- shrd r9, rax, 1
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- mov r9, QWORD PTR [rsp+72]
|
|
|
- shrd rax, r9, 1
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov rax, QWORD PTR [rsp+80]
|
|
|
- shrd r9, rax, 1
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov r9, QWORD PTR [rsp+88]
|
|
|
- shrd rax, r9, 1
|
|
|
- mov QWORD PTR [rcx+80], rax
|
|
|
- mov rax, QWORD PTR [rsp+96]
|
|
|
- shrd r9, rax, 1
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- mov r9, QWORD PTR [rsp+104]
|
|
|
- shrd rax, r9, 1
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov rax, QWORD PTR [rsp+112]
|
|
|
- shrd r9, rax, 1
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov r9, QWORD PTR [rsp+120]
|
|
|
- shrd rax, r9, 1
|
|
|
- mov QWORD PTR [rcx+112], rax
|
|
|
- shrd r9, r12, 1
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- add rsp, 128
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_div2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Reduce the number back to 1024 bits using Montgomery reduction.
|
|
|
-; *
|
|
|
-; * a A single precision number to reduce in place.
|
|
|
-; * m The single precision number representing the modulus.
|
|
|
-; * mp The digit representing the negative inverse of m mod 2^n.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_reduce_avx2_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- push r14
|
|
|
- push r15
|
|
|
- push rdi
|
|
|
- push rsi
|
|
|
- push rbx
|
|
|
- push rbp
|
|
|
- mov r9, rcx
|
|
|
- mov r10, rdx
|
|
|
- xor rbp, rbp
|
|
|
- ; i = 16
|
|
|
- mov r11, 16
|
|
|
- mov r14, QWORD PTR [r9]
|
|
|
- mov r15, QWORD PTR [r9+8]
|
|
|
- mov rdi, QWORD PTR [r9+16]
|
|
|
- mov rsi, QWORD PTR [r9+24]
|
|
|
- add r9, 64
|
|
|
- xor rbp, rbp
|
|
|
-L_1024_mont_reduce_avx2_16_loop:
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r12, r14
|
|
|
- imul rdx, r8
|
|
|
- xor rbx, rbx
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10]
|
|
|
- mov r14, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r14, rcx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+8]
|
|
|
- mov r15, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+16]
|
|
|
- mov rdi, rsi
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+24]
|
|
|
- mov rsi, QWORD PTR [r9+-32]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+32]
|
|
|
- mov r13, QWORD PTR [r9+-24]
|
|
|
- adcx rsi, rax
|
|
|
- adox r13, rcx
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+40]
|
|
|
- mov r12, QWORD PTR [r9+-16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-24], r13
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+48]
|
|
|
- mov r13, QWORD PTR [r9+-8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-16], r12
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+56]
|
|
|
- mov r12, QWORD PTR [r9]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-8], r13
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+64]
|
|
|
- mov r13, QWORD PTR [r9+8]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9], r12
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+72]
|
|
|
- mov r12, QWORD PTR [r9+16]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+8], r13
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+80]
|
|
|
- mov r13, QWORD PTR [r9+24]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+16], r12
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+88]
|
|
|
- mov r12, QWORD PTR [r9+32]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+24], r13
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+96]
|
|
|
- mov r13, QWORD PTR [r9+40]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+32], r12
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+104]
|
|
|
- mov r12, QWORD PTR [r9+48]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+40], r13
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+112]
|
|
|
- mov r13, QWORD PTR [r9+56]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+48], r12
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+120]
|
|
|
- mov r12, QWORD PTR [r9+64]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+56], r13
|
|
|
- adcx r12, rbp
|
|
|
- mov rbp, rbx
|
|
|
- mov QWORD PTR [r9+64], r12
|
|
|
- adox rbp, rbx
|
|
|
- adcx rbp, rbx
|
|
|
- ; mu = a[i] * mp
|
|
|
- mov rdx, r14
|
|
|
- mov r12, r14
|
|
|
- imul rdx, r8
|
|
|
- xor rbx, rbx
|
|
|
- ; a[i+0] += m[0] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10]
|
|
|
- mov r14, r15
|
|
|
- adcx r12, rax
|
|
|
- adox r14, rcx
|
|
|
- ; a[i+1] += m[1] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+8]
|
|
|
- mov r15, rdi
|
|
|
- adcx r14, rax
|
|
|
- adox r15, rcx
|
|
|
- ; a[i+2] += m[2] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+16]
|
|
|
- mov rdi, rsi
|
|
|
- adcx r15, rax
|
|
|
- adox rdi, rcx
|
|
|
- ; a[i+3] += m[3] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+24]
|
|
|
- mov rsi, QWORD PTR [r9+-24]
|
|
|
- adcx rdi, rax
|
|
|
- adox rsi, rcx
|
|
|
- ; a[i+4] += m[4] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+32]
|
|
|
- mov r13, QWORD PTR [r9+-16]
|
|
|
- adcx rsi, rax
|
|
|
- adox r13, rcx
|
|
|
- ; a[i+5] += m[5] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+40]
|
|
|
- mov r12, QWORD PTR [r9+-8]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+-16], r13
|
|
|
- ; a[i+6] += m[6] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+48]
|
|
|
- mov r13, QWORD PTR [r9]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+-8], r12
|
|
|
- ; a[i+7] += m[7] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+56]
|
|
|
- mov r12, QWORD PTR [r9+8]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9], r13
|
|
|
- ; a[i+8] += m[8] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+64]
|
|
|
- mov r13, QWORD PTR [r9+16]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+8], r12
|
|
|
- ; a[i+9] += m[9] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+72]
|
|
|
- mov r12, QWORD PTR [r9+24]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+16], r13
|
|
|
- ; a[i+10] += m[10] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+80]
|
|
|
- mov r13, QWORD PTR [r9+32]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+24], r12
|
|
|
- ; a[i+11] += m[11] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+88]
|
|
|
- mov r12, QWORD PTR [r9+40]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+32], r13
|
|
|
- ; a[i+12] += m[12] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+96]
|
|
|
- mov r13, QWORD PTR [r9+48]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+40], r12
|
|
|
- ; a[i+13] += m[13] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+104]
|
|
|
- mov r12, QWORD PTR [r9+56]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+48], r13
|
|
|
- ; a[i+14] += m[14] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+112]
|
|
|
- mov r13, QWORD PTR [r9+64]
|
|
|
- adcx r12, rax
|
|
|
- adox r13, rcx
|
|
|
- mov QWORD PTR [r9+56], r12
|
|
|
- ; a[i+15] += m[15] * mu
|
|
|
- mulx rcx, rax, QWORD PTR [r10+120]
|
|
|
- mov r12, QWORD PTR [r9+72]
|
|
|
- adcx r13, rax
|
|
|
- adox r12, rcx
|
|
|
- mov QWORD PTR [r9+64], r13
|
|
|
- adcx r12, rbp
|
|
|
- mov rbp, rbx
|
|
|
- mov QWORD PTR [r9+72], r12
|
|
|
- adox rbp, rbx
|
|
|
- adcx rbp, rbx
|
|
|
- ; a += 2
|
|
|
- add r9, 16
|
|
|
- ; i -= 2
|
|
|
- sub r11, 2
|
|
|
- jnz L_1024_mont_reduce_avx2_16_loop
|
|
|
- sub r9, 64
|
|
|
- sub r12, QWORD PTR [r10+120]
|
|
|
- mov r8, r9
|
|
|
- sbb r12, r12
|
|
|
- neg rbp
|
|
|
- not r12
|
|
|
- or rbp, r12
|
|
|
- sub r9, 128
|
|
|
- mov rcx, QWORD PTR [r10]
|
|
|
- mov rdx, r14
|
|
|
- pext rcx, rcx, rbp
|
|
|
- sub rdx, rcx
|
|
|
- mov rcx, QWORD PTR [r10+8]
|
|
|
- mov rax, r15
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+16]
|
|
|
- mov rcx, rdi
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+8], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+24]
|
|
|
- mov rdx, rsi
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+16], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+32]
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+24], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+40]
|
|
|
- mov rcx, QWORD PTR [r8+40]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+32], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+48]
|
|
|
- mov rdx, QWORD PTR [r8+48]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+40], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+56]
|
|
|
- mov rax, QWORD PTR [r8+56]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+48], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+64]
|
|
|
- mov rcx, QWORD PTR [r8+64]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+56], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+72]
|
|
|
- mov rdx, QWORD PTR [r8+72]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+64], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+80]
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+72], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+88]
|
|
|
- mov rcx, QWORD PTR [r8+88]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+80], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+96]
|
|
|
- mov rdx, QWORD PTR [r8+96]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+88], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov rcx, QWORD PTR [r10+104]
|
|
|
- mov rax, QWORD PTR [r8+104]
|
|
|
- pext rcx, rcx, rbp
|
|
|
- mov QWORD PTR [r9+96], rdx
|
|
|
- sbb rax, rcx
|
|
|
- mov rdx, QWORD PTR [r10+112]
|
|
|
- mov rcx, QWORD PTR [r8+112]
|
|
|
- pext rdx, rdx, rbp
|
|
|
- mov QWORD PTR [r9+104], rax
|
|
|
- sbb rcx, rdx
|
|
|
- mov rax, QWORD PTR [r10+120]
|
|
|
- mov rdx, QWORD PTR [r8+120]
|
|
|
- pext rax, rax, rbp
|
|
|
- mov QWORD PTR [r9+112], rcx
|
|
|
- sbb rdx, rax
|
|
|
- mov QWORD PTR [r9+120], rdx
|
|
|
- pop rbp
|
|
|
- pop rbx
|
|
|
- pop rsi
|
|
|
- pop rdi
|
|
|
- pop r15
|
|
|
- pop r14
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_reduce_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Add two Montgomery form numbers (r = a + b % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a First number to add in Montgomery form.
|
|
|
-; * b Second number to add in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_add_avx2_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov r11, QWORD PTR [rdx+16]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- add rax, QWORD PTR [r8]
|
|
|
- mov r13, 0
|
|
|
- adc r10, QWORD PTR [r8+8]
|
|
|
- adc r11, QWORD PTR [r8+16]
|
|
|
- adc r12, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov r11, QWORD PTR [rdx+48]
|
|
|
- mov r12, QWORD PTR [rdx+56]
|
|
|
- adc rax, QWORD PTR [r8+32]
|
|
|
- adc r10, QWORD PTR [r8+40]
|
|
|
- adc r11, QWORD PTR [r8+48]
|
|
|
- adc r12, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- adc rax, QWORD PTR [r8+64]
|
|
|
- adc r10, QWORD PTR [r8+72]
|
|
|
- adc r11, QWORD PTR [r8+80]
|
|
|
- adc r12, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov r11, QWORD PTR [rdx+112]
|
|
|
- mov r12, QWORD PTR [rdx+120]
|
|
|
- adc rax, QWORD PTR [r8+96]
|
|
|
- adc r10, QWORD PTR [r8+104]
|
|
|
- adc r11, QWORD PTR [r8+112]
|
|
|
- adc r12, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- mov QWORD PTR [rcx+120], r12
|
|
|
- sbb r13, 0
|
|
|
- sub r12, QWORD PTR [r9+120]
|
|
|
- sbb r12, r12
|
|
|
- not r12
|
|
|
- or r13, r12
|
|
|
- mov r11, QWORD PTR [r9]
|
|
|
- mov r12, QWORD PTR [r9+8]
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r10, QWORD PTR [rcx+8]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- sub rax, r11
|
|
|
- sbb r10, r12
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov r11, QWORD PTR [r9+16]
|
|
|
- mov r12, QWORD PTR [r9+24]
|
|
|
- mov rax, QWORD PTR [rcx+16]
|
|
|
- mov r10, QWORD PTR [rcx+24]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- sbb rax, r11
|
|
|
- sbb r10, r12
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov r11, QWORD PTR [r9+32]
|
|
|
- mov r12, QWORD PTR [r9+40]
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [rcx+40]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- sbb rax, r11
|
|
|
- sbb r10, r12
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov r11, QWORD PTR [r9+48]
|
|
|
- mov r12, QWORD PTR [r9+56]
|
|
|
- mov rax, QWORD PTR [rcx+48]
|
|
|
- mov r10, QWORD PTR [rcx+56]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- sbb rax, r11
|
|
|
- sbb r10, r12
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- mov r11, QWORD PTR [r9+64]
|
|
|
- mov r12, QWORD PTR [r9+72]
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r10, QWORD PTR [rcx+72]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- sbb rax, r11
|
|
|
- sbb r10, r12
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- mov r11, QWORD PTR [r9+80]
|
|
|
- mov r12, QWORD PTR [r9+88]
|
|
|
- mov rax, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [rcx+88]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- sbb rax, r11
|
|
|
- sbb r10, r12
|
|
|
- mov QWORD PTR [rcx+80], rax
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- mov r11, QWORD PTR [r9+96]
|
|
|
- mov r12, QWORD PTR [r9+104]
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r10, QWORD PTR [rcx+104]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- sbb rax, r11
|
|
|
- sbb r10, r12
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- mov r11, QWORD PTR [r9+112]
|
|
|
- mov r12, QWORD PTR [r9+120]
|
|
|
- mov rax, QWORD PTR [rcx+112]
|
|
|
- mov r10, QWORD PTR [rcx+120]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- sbb rax, r11
|
|
|
- sbb r10, r12
|
|
|
- mov QWORD PTR [rcx+112], rax
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_add_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Double a Montgomery form number (r = a + a % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a Number to souble in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_dbl_avx2_16 PROC
|
|
|
- push r12
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- add rax, QWORD PTR [rdx]
|
|
|
- mov r12, 0
|
|
|
- adc r9, QWORD PTR [rdx+8]
|
|
|
- adc r10, QWORD PTR [rdx+16]
|
|
|
- adc r11, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- adc rax, QWORD PTR [rdx+32]
|
|
|
- adc r9, QWORD PTR [rdx+40]
|
|
|
- adc r10, QWORD PTR [rdx+48]
|
|
|
- adc r11, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- adc rax, QWORD PTR [rdx+64]
|
|
|
- adc r9, QWORD PTR [rdx+72]
|
|
|
- adc r10, QWORD PTR [rdx+80]
|
|
|
- adc r11, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- adc rax, QWORD PTR [rdx+96]
|
|
|
- adc r9, QWORD PTR [rdx+104]
|
|
|
- adc r10, QWORD PTR [rdx+112]
|
|
|
- adc r11, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- sbb r12, 0
|
|
|
- sub r11, QWORD PTR [r8+120]
|
|
|
- sbb r11, r11
|
|
|
- not r11
|
|
|
- or r12, r11
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sub rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov rax, QWORD PTR [rcx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- mov rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- mov rax, QWORD PTR [rcx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+80], rax
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov rax, QWORD PTR [rcx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+112], rax
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_dbl_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Triple a Montgomery form number (r = a + a + a % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a Number to souble in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_tpl_avx2_16 PROC
|
|
|
- push r12
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r9, QWORD PTR [rdx+8]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- add rax, QWORD PTR [rdx]
|
|
|
- mov r12, 0
|
|
|
- adc r9, QWORD PTR [rdx+8]
|
|
|
- adc r10, QWORD PTR [rdx+16]
|
|
|
- adc r11, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov r9, QWORD PTR [rdx+40]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- adc rax, QWORD PTR [rdx+32]
|
|
|
- adc r9, QWORD PTR [rdx+40]
|
|
|
- adc r10, QWORD PTR [rdx+48]
|
|
|
- adc r11, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov r9, QWORD PTR [rdx+72]
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- adc rax, QWORD PTR [rdx+64]
|
|
|
- adc r9, QWORD PTR [rdx+72]
|
|
|
- adc r10, QWORD PTR [rdx+80]
|
|
|
- adc r11, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov r9, QWORD PTR [rdx+104]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- adc rax, QWORD PTR [rdx+96]
|
|
|
- adc r9, QWORD PTR [rdx+104]
|
|
|
- adc r10, QWORD PTR [rdx+112]
|
|
|
- adc r11, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- sbb r12, 0
|
|
|
- sub r11, QWORD PTR [r8+120]
|
|
|
- sbb r11, r11
|
|
|
- not r11
|
|
|
- or r12, r11
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sub rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov rax, QWORD PTR [rcx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- mov rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- mov rax, QWORD PTR [rcx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+80], rax
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov rax, QWORD PTR [rcx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+112], rax
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- mov r10, QWORD PTR [rcx+16]
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- add rax, QWORD PTR [rdx]
|
|
|
- mov r12, 0
|
|
|
- adc r9, QWORD PTR [rdx+8]
|
|
|
- adc r10, QWORD PTR [rdx+16]
|
|
|
- adc r11, QWORD PTR [rdx+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- mov r10, QWORD PTR [rcx+48]
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- adc rax, QWORD PTR [rdx+32]
|
|
|
- adc r9, QWORD PTR [rdx+40]
|
|
|
- adc r10, QWORD PTR [rdx+48]
|
|
|
- adc r11, QWORD PTR [rdx+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- mov r10, QWORD PTR [rcx+80]
|
|
|
- mov r11, QWORD PTR [rcx+88]
|
|
|
- adc rax, QWORD PTR [rdx+64]
|
|
|
- adc r9, QWORD PTR [rdx+72]
|
|
|
- adc r10, QWORD PTR [rdx+80]
|
|
|
- adc r11, QWORD PTR [rdx+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- mov r10, QWORD PTR [rcx+112]
|
|
|
- mov r11, QWORD PTR [rcx+120]
|
|
|
- adc rax, QWORD PTR [rdx+96]
|
|
|
- adc r9, QWORD PTR [rdx+104]
|
|
|
- adc r10, QWORD PTR [rdx+112]
|
|
|
- adc r11, QWORD PTR [rdx+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- sbb r12, 0
|
|
|
- sub r11, QWORD PTR [r8+120]
|
|
|
- sbb r11, r11
|
|
|
- not r11
|
|
|
- or r12, r11
|
|
|
- mov r10, QWORD PTR [r8]
|
|
|
- mov r11, QWORD PTR [r8+8]
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r9, QWORD PTR [rcx+8]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sub rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r9
|
|
|
- mov r10, QWORD PTR [r8+16]
|
|
|
- mov r11, QWORD PTR [r8+24]
|
|
|
- mov rax, QWORD PTR [rcx+16]
|
|
|
- mov r9, QWORD PTR [rcx+24]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r9
|
|
|
- mov r10, QWORD PTR [r8+32]
|
|
|
- mov r11, QWORD PTR [r8+40]
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r9, QWORD PTR [rcx+40]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r9
|
|
|
- mov r10, QWORD PTR [r8+48]
|
|
|
- mov r11, QWORD PTR [r8+56]
|
|
|
- mov rax, QWORD PTR [rcx+48]
|
|
|
- mov r9, QWORD PTR [rcx+56]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r9
|
|
|
- mov r10, QWORD PTR [r8+64]
|
|
|
- mov r11, QWORD PTR [r8+72]
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r9, QWORD PTR [rcx+72]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r9
|
|
|
- mov r10, QWORD PTR [r8+80]
|
|
|
- mov r11, QWORD PTR [r8+88]
|
|
|
- mov rax, QWORD PTR [rcx+80]
|
|
|
- mov r9, QWORD PTR [rcx+88]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+80], rax
|
|
|
- mov QWORD PTR [rcx+88], r9
|
|
|
- mov r10, QWORD PTR [r8+96]
|
|
|
- mov r11, QWORD PTR [r8+104]
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r9, QWORD PTR [rcx+104]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r9
|
|
|
- mov r10, QWORD PTR [r8+112]
|
|
|
- mov r11, QWORD PTR [r8+120]
|
|
|
- mov rax, QWORD PTR [rcx+112]
|
|
|
- mov r9, QWORD PTR [rcx+120]
|
|
|
- pext r10, r10, r12
|
|
|
- pext r11, r11, r12
|
|
|
- sbb rax, r10
|
|
|
- sbb r9, r11
|
|
|
- mov QWORD PTR [rcx+112], rax
|
|
|
- mov QWORD PTR [rcx+120], r9
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_tpl_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Subtract two Montgomery form numbers (r = a - b % m).
|
|
|
-; *
|
|
|
-; * r Result of addition.
|
|
|
-; * a First number to add in Montgomery form.
|
|
|
-; * b Second number to add in Montgomery form.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_sub_avx2_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov rax, QWORD PTR [rdx]
|
|
|
- mov r10, QWORD PTR [rdx+8]
|
|
|
- mov r11, QWORD PTR [rdx+16]
|
|
|
- mov r12, QWORD PTR [rdx+24]
|
|
|
- sub rax, QWORD PTR [r8]
|
|
|
- mov r13, 0
|
|
|
- sbb r10, QWORD PTR [r8+8]
|
|
|
- sbb r11, QWORD PTR [r8+16]
|
|
|
- sbb r12, QWORD PTR [r8+24]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov QWORD PTR [rcx+16], r11
|
|
|
- mov QWORD PTR [rcx+24], r12
|
|
|
- mov rax, QWORD PTR [rdx+32]
|
|
|
- mov r10, QWORD PTR [rdx+40]
|
|
|
- mov r11, QWORD PTR [rdx+48]
|
|
|
- mov r12, QWORD PTR [rdx+56]
|
|
|
- sbb rax, QWORD PTR [r8+32]
|
|
|
- sbb r10, QWORD PTR [r8+40]
|
|
|
- sbb r11, QWORD PTR [r8+48]
|
|
|
- sbb r12, QWORD PTR [r8+56]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov QWORD PTR [rcx+48], r11
|
|
|
- mov QWORD PTR [rcx+56], r12
|
|
|
- mov rax, QWORD PTR [rdx+64]
|
|
|
- mov r10, QWORD PTR [rdx+72]
|
|
|
- mov r11, QWORD PTR [rdx+80]
|
|
|
- mov r12, QWORD PTR [rdx+88]
|
|
|
- sbb rax, QWORD PTR [r8+64]
|
|
|
- sbb r10, QWORD PTR [r8+72]
|
|
|
- sbb r11, QWORD PTR [r8+80]
|
|
|
- sbb r12, QWORD PTR [r8+88]
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- mov QWORD PTR [rcx+80], r11
|
|
|
- mov QWORD PTR [rcx+88], r12
|
|
|
- mov rax, QWORD PTR [rdx+96]
|
|
|
- mov r10, QWORD PTR [rdx+104]
|
|
|
- mov r11, QWORD PTR [rdx+112]
|
|
|
- mov r12, QWORD PTR [rdx+120]
|
|
|
- sbb rax, QWORD PTR [r8+96]
|
|
|
- sbb r10, QWORD PTR [r8+104]
|
|
|
- sbb r11, QWORD PTR [r8+112]
|
|
|
- sbb r12, QWORD PTR [r8+120]
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- mov QWORD PTR [rcx+112], r11
|
|
|
- mov QWORD PTR [rcx+120], r12
|
|
|
- sbb r13, 0
|
|
|
- mov r11, QWORD PTR [r9]
|
|
|
- mov r12, QWORD PTR [r9+8]
|
|
|
- mov rax, QWORD PTR [rcx]
|
|
|
- mov r10, QWORD PTR [rcx+8]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- add rax, r11
|
|
|
- adc r10, r12
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov r11, QWORD PTR [r9+16]
|
|
|
- mov r12, QWORD PTR [r9+24]
|
|
|
- mov rax, QWORD PTR [rcx+16]
|
|
|
- mov r10, QWORD PTR [rcx+24]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- adc rax, r11
|
|
|
- adc r10, r12
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov r11, QWORD PTR [r9+32]
|
|
|
- mov r12, QWORD PTR [r9+40]
|
|
|
- mov rax, QWORD PTR [rcx+32]
|
|
|
- mov r10, QWORD PTR [rcx+40]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- adc rax, r11
|
|
|
- adc r10, r12
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov r11, QWORD PTR [r9+48]
|
|
|
- mov r12, QWORD PTR [r9+56]
|
|
|
- mov rax, QWORD PTR [rcx+48]
|
|
|
- mov r10, QWORD PTR [rcx+56]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- adc rax, r11
|
|
|
- adc r10, r12
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- mov r11, QWORD PTR [r9+64]
|
|
|
- mov r12, QWORD PTR [r9+72]
|
|
|
- mov rax, QWORD PTR [rcx+64]
|
|
|
- mov r10, QWORD PTR [rcx+72]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- adc rax, r11
|
|
|
- adc r10, r12
|
|
|
- mov QWORD PTR [rcx+64], rax
|
|
|
- mov QWORD PTR [rcx+72], r10
|
|
|
- mov r11, QWORD PTR [r9+80]
|
|
|
- mov r12, QWORD PTR [r9+88]
|
|
|
- mov rax, QWORD PTR [rcx+80]
|
|
|
- mov r10, QWORD PTR [rcx+88]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- adc rax, r11
|
|
|
- adc r10, r12
|
|
|
- mov QWORD PTR [rcx+80], rax
|
|
|
- mov QWORD PTR [rcx+88], r10
|
|
|
- mov r11, QWORD PTR [r9+96]
|
|
|
- mov r12, QWORD PTR [r9+104]
|
|
|
- mov rax, QWORD PTR [rcx+96]
|
|
|
- mov r10, QWORD PTR [rcx+104]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- adc rax, r11
|
|
|
- adc r10, r12
|
|
|
- mov QWORD PTR [rcx+96], rax
|
|
|
- mov QWORD PTR [rcx+104], r10
|
|
|
- mov r11, QWORD PTR [r9+112]
|
|
|
- mov r12, QWORD PTR [r9+120]
|
|
|
- mov rax, QWORD PTR [rcx+112]
|
|
|
- mov r10, QWORD PTR [rcx+120]
|
|
|
- pext r11, r11, r13
|
|
|
- pext r12, r12, r13
|
|
|
- adc rax, r11
|
|
|
- adc r10, r12
|
|
|
- mov QWORD PTR [rcx+112], rax
|
|
|
- mov QWORD PTR [rcx+120], r10
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_sub_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-IFDEF HAVE_INTEL_AVX2
|
|
|
-; /* Divide the number by 2 mod the modulus (prime). (r = a / 2 % m)
|
|
|
-; *
|
|
|
-; * r Result of division by 2.
|
|
|
-; * a Number to divide.
|
|
|
-; * m Modulus (prime).
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_mont_div2_avx2_16 PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov r13, QWORD PTR [rdx]
|
|
|
- xor r12, r12
|
|
|
- mov r10, r13
|
|
|
- and r13, 1
|
|
|
- neg r13
|
|
|
- mov rax, QWORD PTR [r8]
|
|
|
- mov r9, QWORD PTR [r8+8]
|
|
|
- mov r10, QWORD PTR [rdx]
|
|
|
- mov r11, QWORD PTR [rdx+8]
|
|
|
- pext rax, rax, r13
|
|
|
- pext r9, r9, r13
|
|
|
- add r10, rax
|
|
|
- adc r11, r9
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov rax, QWORD PTR [r8+16]
|
|
|
- mov r9, QWORD PTR [r8+24]
|
|
|
- mov r10, QWORD PTR [rdx+16]
|
|
|
- mov r11, QWORD PTR [rdx+24]
|
|
|
- pext rax, rax, r13
|
|
|
- pext r9, r9, r13
|
|
|
- adc r10, rax
|
|
|
- adc r11, r9
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov rax, QWORD PTR [r8+32]
|
|
|
- mov r9, QWORD PTR [r8+40]
|
|
|
- mov r10, QWORD PTR [rdx+32]
|
|
|
- mov r11, QWORD PTR [rdx+40]
|
|
|
- pext rax, rax, r13
|
|
|
- pext r9, r9, r13
|
|
|
- adc r10, rax
|
|
|
- adc r11, r9
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov rax, QWORD PTR [r8+48]
|
|
|
- mov r9, QWORD PTR [r8+56]
|
|
|
- mov r10, QWORD PTR [rdx+48]
|
|
|
- mov r11, QWORD PTR [rdx+56]
|
|
|
- pext rax, rax, r13
|
|
|
- pext r9, r9, r13
|
|
|
- adc r10, rax
|
|
|
- adc r11, r9
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov rax, QWORD PTR [r8+64]
|
|
|
- mov r9, QWORD PTR [r8+72]
|
|
|
- mov r10, QWORD PTR [rdx+64]
|
|
|
- mov r11, QWORD PTR [rdx+72]
|
|
|
- pext rax, rax, r13
|
|
|
- pext r9, r9, r13
|
|
|
- adc r10, rax
|
|
|
- adc r11, r9
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov rax, QWORD PTR [r8+80]
|
|
|
- mov r9, QWORD PTR [r8+88]
|
|
|
- mov r10, QWORD PTR [rdx+80]
|
|
|
- mov r11, QWORD PTR [rdx+88]
|
|
|
- pext rax, rax, r13
|
|
|
- pext r9, r9, r13
|
|
|
- adc r10, rax
|
|
|
- adc r11, r9
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov rax, QWORD PTR [r8+96]
|
|
|
- mov r9, QWORD PTR [r8+104]
|
|
|
- mov r10, QWORD PTR [rdx+96]
|
|
|
- mov r11, QWORD PTR [rdx+104]
|
|
|
- pext rax, rax, r13
|
|
|
- pext r9, r9, r13
|
|
|
- adc r10, rax
|
|
|
- adc r11, r9
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- mov rax, QWORD PTR [r8+112]
|
|
|
- mov r9, QWORD PTR [r8+120]
|
|
|
- mov r10, QWORD PTR [rdx+112]
|
|
|
- mov r11, QWORD PTR [rdx+120]
|
|
|
- pext rax, rax, r13
|
|
|
- pext r9, r9, r13
|
|
|
- adc r10, rax
|
|
|
- adc r11, r9
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- adc r12, 0
|
|
|
- mov r10, QWORD PTR [rcx]
|
|
|
- mov r11, QWORD PTR [rcx+8]
|
|
|
- shrd r10, r11, 1
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- mov r10, QWORD PTR [rcx+16]
|
|
|
- shrd r11, r10, 1
|
|
|
- mov QWORD PTR [rcx+8], r11
|
|
|
- mov r11, QWORD PTR [rcx+24]
|
|
|
- shrd r10, r11, 1
|
|
|
- mov QWORD PTR [rcx+16], r10
|
|
|
- mov r10, QWORD PTR [rcx+32]
|
|
|
- shrd r11, r10, 1
|
|
|
- mov QWORD PTR [rcx+24], r11
|
|
|
- mov r11, QWORD PTR [rcx+40]
|
|
|
- shrd r10, r11, 1
|
|
|
- mov QWORD PTR [rcx+32], r10
|
|
|
- mov r10, QWORD PTR [rcx+48]
|
|
|
- shrd r11, r10, 1
|
|
|
- mov QWORD PTR [rcx+40], r11
|
|
|
- mov r11, QWORD PTR [rcx+56]
|
|
|
- shrd r10, r11, 1
|
|
|
- mov QWORD PTR [rcx+48], r10
|
|
|
- mov r10, QWORD PTR [rcx+64]
|
|
|
- shrd r11, r10, 1
|
|
|
- mov QWORD PTR [rcx+56], r11
|
|
|
- mov r11, QWORD PTR [rcx+72]
|
|
|
- shrd r10, r11, 1
|
|
|
- mov QWORD PTR [rcx+64], r10
|
|
|
- mov r10, QWORD PTR [rcx+80]
|
|
|
- shrd r11, r10, 1
|
|
|
- mov QWORD PTR [rcx+72], r11
|
|
|
- mov r11, QWORD PTR [rcx+88]
|
|
|
- shrd r10, r11, 1
|
|
|
- mov QWORD PTR [rcx+80], r10
|
|
|
- mov r10, QWORD PTR [rcx+96]
|
|
|
- shrd r11, r10, 1
|
|
|
- mov QWORD PTR [rcx+88], r11
|
|
|
- mov r11, QWORD PTR [rcx+104]
|
|
|
- shrd r10, r11, 1
|
|
|
- mov QWORD PTR [rcx+96], r10
|
|
|
- mov r10, QWORD PTR [rcx+112]
|
|
|
- shrd r11, r10, 1
|
|
|
- mov QWORD PTR [rcx+104], r11
|
|
|
- mov r11, QWORD PTR [rcx+120]
|
|
|
- shrd r10, r11, 1
|
|
|
- mov QWORD PTR [rcx+112], r10
|
|
|
- shrd r11, r12, 1
|
|
|
- mov QWORD PTR [rcx+120], r11
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_mont_div2_avx2_16 ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the bswap instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_from_bin_bswap PROC
|
|
|
- push r12
|
|
|
- push r13
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 128
|
|
|
- xor r13, r13
|
|
|
- jmp L_1024_from_bin_bswap_64_end
|
|
|
-L_1024_from_bin_bswap_64_start:
|
|
|
- sub r11, 64
|
|
|
- mov rax, QWORD PTR [r11+56]
|
|
|
- mov r10, QWORD PTR [r11+48]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- mov rax, QWORD PTR [r11+40]
|
|
|
- mov r10, QWORD PTR [r11+32]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- mov rax, QWORD PTR [r11+24]
|
|
|
- mov r10, QWORD PTR [r11+16]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- mov rax, QWORD PTR [r11+8]
|
|
|
- mov r10, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- bswap r10
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_1024_from_bin_bswap_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_1024_from_bin_bswap_64_start
|
|
|
- jmp L_1024_from_bin_bswap_8_end
|
|
|
-L_1024_from_bin_bswap_8_start:
|
|
|
- sub r11, 8
|
|
|
- mov rax, QWORD PTR [r11]
|
|
|
- bswap rax
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_1024_from_bin_bswap_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_1024_from_bin_bswap_8_start
|
|
|
- cmp r9, r13
|
|
|
- je L_1024_from_bin_bswap_hi_end
|
|
|
- mov r10, r13
|
|
|
- mov rax, r13
|
|
|
-L_1024_from_bin_bswap_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_1024_from_bin_bswap_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_1024_from_bin_bswap_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_1024_from_bin_bswap_zero_end
|
|
|
-L_1024_from_bin_bswap_zero_start:
|
|
|
- mov QWORD PTR [rcx], r13
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_1024_from_bin_bswap_zero_start
|
|
|
-L_1024_from_bin_bswap_zero_end:
|
|
|
- pop r13
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_from_bin_bswap ENDP
|
|
|
-_text ENDS
|
|
|
-IFNDEF NO_MOVBE_SUPPORT
|
|
|
-; /* Read big endian unsigned byte array into r.
|
|
|
-; * Uses the movbe instruction which is an optional instruction.
|
|
|
-; *
|
|
|
-; * r A single precision integer.
|
|
|
-; * size Maximum number of bytes to convert
|
|
|
-; * a Byte array.
|
|
|
-; * n Number of bytes in array to read.
|
|
|
-; */
|
|
|
-_text SEGMENT READONLY PARA
|
|
|
-sp_1024_from_bin_movbe PROC
|
|
|
- push r12
|
|
|
- mov r11, r8
|
|
|
- mov r12, rcx
|
|
|
- add r11, r9
|
|
|
- add r12, 128
|
|
|
- jmp L_1024_from_bin_movbe_64_end
|
|
|
-L_1024_from_bin_movbe_64_start:
|
|
|
- sub r11, 64
|
|
|
- movbe rax, QWORD PTR [r11+56]
|
|
|
- movbe r10, QWORD PTR [r11+48]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- mov QWORD PTR [rcx+8], r10
|
|
|
- movbe rax, QWORD PTR [r11+40]
|
|
|
- movbe r10, QWORD PTR [r11+32]
|
|
|
- mov QWORD PTR [rcx+16], rax
|
|
|
- mov QWORD PTR [rcx+24], r10
|
|
|
- movbe rax, QWORD PTR [r11+24]
|
|
|
- movbe r10, QWORD PTR [r11+16]
|
|
|
- mov QWORD PTR [rcx+32], rax
|
|
|
- mov QWORD PTR [rcx+40], r10
|
|
|
- movbe rax, QWORD PTR [r11+8]
|
|
|
- movbe r10, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx+48], rax
|
|
|
- mov QWORD PTR [rcx+56], r10
|
|
|
- add rcx, 64
|
|
|
- sub r9, 64
|
|
|
-L_1024_from_bin_movbe_64_end:
|
|
|
- cmp r9, 63
|
|
|
- jg L_1024_from_bin_movbe_64_start
|
|
|
- jmp L_1024_from_bin_movbe_8_end
|
|
|
-L_1024_from_bin_movbe_8_start:
|
|
|
- sub r11, 8
|
|
|
- movbe rax, QWORD PTR [r11]
|
|
|
- mov QWORD PTR [rcx], rax
|
|
|
- add rcx, 8
|
|
|
- sub r9, 8
|
|
|
-L_1024_from_bin_movbe_8_end:
|
|
|
- cmp r9, 7
|
|
|
- jg L_1024_from_bin_movbe_8_start
|
|
|
- cmp r9, 0
|
|
|
- je L_1024_from_bin_movbe_hi_end
|
|
|
- mov r10, 0
|
|
|
- mov rax, 0
|
|
|
-L_1024_from_bin_movbe_hi_start:
|
|
|
- mov al, BYTE PTR [r8]
|
|
|
- shl r10, 8
|
|
|
- inc r8
|
|
|
- add r10, rax
|
|
|
- dec r9
|
|
|
- jg L_1024_from_bin_movbe_hi_start
|
|
|
- mov QWORD PTR [rcx], r10
|
|
|
- add rcx, 8
|
|
|
-L_1024_from_bin_movbe_hi_end:
|
|
|
- cmp rcx, r12
|
|
|
- jge L_1024_from_bin_movbe_zero_end
|
|
|
-L_1024_from_bin_movbe_zero_start:
|
|
|
- mov QWORD PTR [rcx], 0
|
|
|
- add rcx, 8
|
|
|
- cmp rcx, r12
|
|
|
- jl L_1024_from_bin_movbe_zero_start
|
|
|
-L_1024_from_bin_movbe_zero_end:
|
|
|
- pop r12
|
|
|
- ret
|
|
|
-sp_1024_from_bin_movbe ENDP
|
|
|
-_text ENDS
|
|
|
-ENDIF
|
|
|
-ENDIF
|
|
|
-END
|