Based on Nekogram. Key additions: - Rebrand to FoxiGram (app name, APK name, applicationId com.foxigram.app) - Embedded Xray (VLESS+Reality) proxy client via JNI libxray.so - Bundled hidden one-tap proxies (LTE + WiFi), read-only in UI - Auto-restore proxy on restart, rebind to active network (LTE/WiFi) - Server credentials externalized to git-ignored XrayServers.java (+ template) - libxray Go source included; compiled .so, keystore, google-services.json ignored
3302 lines
67 KiB
NASM
3302 lines
67 KiB
NASM
; This file is generated from a similarly-named Perl script in the BoringSSL
|
|
; source tree. Do not edit by hand.
|
|
|
|
%ifidn __OUTPUT_FORMAT__, win64
|
|
default rel
|
|
%define XMMWORD
|
|
%define YMMWORD
|
|
%define ZMMWORD
|
|
%define _CET_ENDBR
|
|
|
|
%ifdef BORINGSSL_PREFIX
|
|
%include "boringssl_prefix_symbols_nasm.inc"
|
|
%endif
|
|
section .rdata rdata align=8
|
|
|
|
ALIGN 16
|
|
one:
|
|
DQ 1,0
|
|
two:
|
|
DQ 2,0
|
|
three:
|
|
DQ 3,0
|
|
four:
|
|
DQ 4,0
|
|
five:
|
|
DQ 5,0
|
|
six:
|
|
DQ 6,0
|
|
seven:
|
|
DQ 7,0
|
|
eight:
|
|
DQ 8,0
|
|
|
|
OR_MASK:
|
|
DD 0x00000000,0x00000000,0x00000000,0x80000000
|
|
poly:
|
|
DQ 0x1,0xc200000000000000
|
|
mask:
|
|
DD 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
|
|
con1:
|
|
DD 1,1,1,1
|
|
con2:
|
|
DD 0x1b,0x1b,0x1b,0x1b
|
|
con3:
|
|
DB -1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7
|
|
and_mask:
|
|
DD 0,0xffffffff,0xffffffff,0xffffffff
|
|
section .text code align=64
|
|
|
|
|
|
ALIGN 16
|
|
GFMUL:
|
|
|
|
vpclmulqdq xmm2,xmm0,xmm1,0x00
|
|
vpclmulqdq xmm5,xmm0,xmm1,0x11
|
|
vpclmulqdq xmm3,xmm0,xmm1,0x10
|
|
vpclmulqdq xmm4,xmm0,xmm1,0x01
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm3,8
|
|
vpsrldq xmm3,xmm3,8
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpxor xmm5,xmm5,xmm3
|
|
|
|
vpclmulqdq xmm3,xmm2,XMMWORD[poly],0x10
|
|
vpshufd xmm4,xmm2,78
|
|
vpxor xmm2,xmm3,xmm4
|
|
|
|
vpclmulqdq xmm3,xmm2,XMMWORD[poly],0x10
|
|
vpshufd xmm4,xmm2,78
|
|
vpxor xmm2,xmm3,xmm4
|
|
|
|
vpxor xmm0,xmm2,xmm5
|
|
ret
|
|
|
|
|
|
global aesgcmsiv_htable_init
|
|
|
|
ALIGN 16
|
|
aesgcmsiv_htable_init:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesgcmsiv_htable_init:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
vmovdqa xmm0,XMMWORD[rsi]
|
|
vmovdqa xmm1,xmm0
|
|
vmovdqa XMMWORD[rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[16+rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[32+rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[48+rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[64+rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[80+rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[96+rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[112+rdi],xmm0
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aesgcmsiv_htable_init:
|
|
global aesgcmsiv_htable6_init
|
|
|
|
ALIGN 16
|
|
aesgcmsiv_htable6_init:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesgcmsiv_htable6_init:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
vmovdqa xmm0,XMMWORD[rsi]
|
|
vmovdqa xmm1,xmm0
|
|
vmovdqa XMMWORD[rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[16+rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[32+rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[48+rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[64+rdi],xmm0
|
|
call GFMUL
|
|
vmovdqa XMMWORD[80+rdi],xmm0
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aesgcmsiv_htable6_init:
|
|
global aesgcmsiv_htable_polyval
|
|
|
|
ALIGN 16
|
|
aesgcmsiv_htable_polyval:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesgcmsiv_htable_polyval:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
test rdx,rdx
|
|
jnz NEAR $L$htable_polyval_start
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$htable_polyval_start:
|
|
vzeroall
|
|
|
|
|
|
|
|
mov r11,rdx
|
|
and r11,127
|
|
|
|
jz NEAR $L$htable_polyval_no_prefix
|
|
|
|
vpxor xmm9,xmm9,xmm9
|
|
vmovdqa xmm1,XMMWORD[rcx]
|
|
sub rdx,r11
|
|
|
|
sub r11,16
|
|
|
|
|
|
vmovdqu xmm0,XMMWORD[rsi]
|
|
vpxor xmm0,xmm0,xmm1
|
|
|
|
vpclmulqdq xmm5,xmm0,XMMWORD[r11*1+rdi],0x01
|
|
vpclmulqdq xmm3,xmm0,XMMWORD[r11*1+rdi],0x00
|
|
vpclmulqdq xmm4,xmm0,XMMWORD[r11*1+rdi],0x11
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
|
|
vpxor xmm5,xmm5,xmm6
|
|
|
|
lea rsi,[16+rsi]
|
|
test r11,r11
|
|
jnz NEAR $L$htable_polyval_prefix_loop
|
|
jmp NEAR $L$htable_polyval_prefix_complete
|
|
|
|
|
|
ALIGN 64
|
|
$L$htable_polyval_prefix_loop:
|
|
sub r11,16
|
|
|
|
vmovdqu xmm0,XMMWORD[rsi]
|
|
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x00
|
|
vpxor xmm3,xmm3,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x11
|
|
vpxor xmm4,xmm4,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x01
|
|
vpxor xmm5,xmm5,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
|
|
vpxor xmm5,xmm5,xmm6
|
|
|
|
test r11,r11
|
|
|
|
lea rsi,[16+rsi]
|
|
|
|
jnz NEAR $L$htable_polyval_prefix_loop
|
|
|
|
$L$htable_polyval_prefix_complete:
|
|
vpsrldq xmm6,xmm5,8
|
|
vpslldq xmm5,xmm5,8
|
|
|
|
vpxor xmm9,xmm4,xmm6
|
|
vpxor xmm1,xmm3,xmm5
|
|
|
|
jmp NEAR $L$htable_polyval_main_loop
|
|
|
|
$L$htable_polyval_no_prefix:
|
|
|
|
|
|
|
|
|
|
vpxor xmm1,xmm1,xmm1
|
|
vmovdqa xmm9,XMMWORD[rcx]
|
|
|
|
ALIGN 64
|
|
$L$htable_polyval_main_loop:
|
|
sub rdx,0x80
|
|
jb NEAR $L$htable_polyval_out
|
|
|
|
vmovdqu xmm0,XMMWORD[112+rsi]
|
|
|
|
vpclmulqdq xmm5,xmm0,XMMWORD[rdi],0x01
|
|
vpclmulqdq xmm3,xmm0,XMMWORD[rdi],0x00
|
|
vpclmulqdq xmm4,xmm0,XMMWORD[rdi],0x11
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[rdi],0x10
|
|
vpxor xmm5,xmm5,xmm6
|
|
|
|
|
|
vmovdqu xmm0,XMMWORD[96+rsi]
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x01
|
|
vpxor xmm5,xmm5,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x00
|
|
vpxor xmm3,xmm3,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x11
|
|
vpxor xmm4,xmm4,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[16+rdi],0x10
|
|
vpxor xmm5,xmm5,xmm6
|
|
|
|
|
|
|
|
vmovdqu xmm0,XMMWORD[80+rsi]
|
|
|
|
vpclmulqdq xmm7,xmm1,XMMWORD[poly],0x10
|
|
vpalignr xmm1,xmm1,xmm1,8
|
|
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x01
|
|
vpxor xmm5,xmm5,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x00
|
|
vpxor xmm3,xmm3,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x11
|
|
vpxor xmm4,xmm4,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[32+rdi],0x10
|
|
vpxor xmm5,xmm5,xmm6
|
|
|
|
|
|
vpxor xmm1,xmm1,xmm7
|
|
|
|
vmovdqu xmm0,XMMWORD[64+rsi]
|
|
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x01
|
|
vpxor xmm5,xmm5,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x00
|
|
vpxor xmm3,xmm3,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x11
|
|
vpxor xmm4,xmm4,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[48+rdi],0x10
|
|
vpxor xmm5,xmm5,xmm6
|
|
|
|
|
|
vmovdqu xmm0,XMMWORD[48+rsi]
|
|
|
|
vpclmulqdq xmm7,xmm1,XMMWORD[poly],0x10
|
|
vpalignr xmm1,xmm1,xmm1,8
|
|
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x01
|
|
vpxor xmm5,xmm5,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x00
|
|
vpxor xmm3,xmm3,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x11
|
|
vpxor xmm4,xmm4,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[64+rdi],0x10
|
|
vpxor xmm5,xmm5,xmm6
|
|
|
|
|
|
vpxor xmm1,xmm1,xmm7
|
|
|
|
vmovdqu xmm0,XMMWORD[32+rsi]
|
|
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x01
|
|
vpxor xmm5,xmm5,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x00
|
|
vpxor xmm3,xmm3,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x11
|
|
vpxor xmm4,xmm4,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[80+rdi],0x10
|
|
vpxor xmm5,xmm5,xmm6
|
|
|
|
|
|
vpxor xmm1,xmm1,xmm9
|
|
|
|
vmovdqu xmm0,XMMWORD[16+rsi]
|
|
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x01
|
|
vpxor xmm5,xmm5,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x00
|
|
vpxor xmm3,xmm3,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x11
|
|
vpxor xmm4,xmm4,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[96+rdi],0x10
|
|
vpxor xmm5,xmm5,xmm6
|
|
|
|
|
|
vmovdqu xmm0,XMMWORD[rsi]
|
|
vpxor xmm0,xmm0,xmm1
|
|
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x01
|
|
vpxor xmm5,xmm5,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x00
|
|
vpxor xmm3,xmm3,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x11
|
|
vpxor xmm4,xmm4,xmm6
|
|
vpclmulqdq xmm6,xmm0,XMMWORD[112+rdi],0x10
|
|
vpxor xmm5,xmm5,xmm6
|
|
|
|
|
|
vpsrldq xmm6,xmm5,8
|
|
vpslldq xmm5,xmm5,8
|
|
|
|
vpxor xmm9,xmm4,xmm6
|
|
vpxor xmm1,xmm3,xmm5
|
|
|
|
lea rsi,[128+rsi]
|
|
jmp NEAR $L$htable_polyval_main_loop
|
|
|
|
|
|
|
|
$L$htable_polyval_out:
|
|
vpclmulqdq xmm6,xmm1,XMMWORD[poly],0x10
|
|
vpalignr xmm1,xmm1,xmm1,8
|
|
vpxor xmm1,xmm1,xmm6
|
|
|
|
vpclmulqdq xmm6,xmm1,XMMWORD[poly],0x10
|
|
vpalignr xmm1,xmm1,xmm1,8
|
|
vpxor xmm1,xmm1,xmm6
|
|
vpxor xmm1,xmm1,xmm9
|
|
|
|
vmovdqu XMMWORD[rcx],xmm1
|
|
vzeroupper
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aesgcmsiv_htable_polyval:
|
|
global aesgcmsiv_polyval_horner
|
|
|
|
ALIGN 16
|
|
aesgcmsiv_polyval_horner:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aesgcmsiv_polyval_horner:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
test rcx,rcx
|
|
jnz NEAR $L$polyval_horner_start
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$polyval_horner_start:
|
|
|
|
|
|
|
|
xor r10,r10
|
|
shl rcx,4
|
|
|
|
vmovdqa xmm1,XMMWORD[rsi]
|
|
vmovdqa xmm0,XMMWORD[rdi]
|
|
|
|
$L$polyval_horner_loop:
|
|
vpxor xmm0,xmm0,XMMWORD[r10*1+rdx]
|
|
call GFMUL
|
|
|
|
add r10,16
|
|
cmp rcx,r10
|
|
jne NEAR $L$polyval_horner_loop
|
|
|
|
|
|
vmovdqa XMMWORD[rdi],xmm0
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aesgcmsiv_polyval_horner:
|
|
global aes128gcmsiv_aes_ks
|
|
|
|
ALIGN 16
|
|
aes128gcmsiv_aes_ks:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes128gcmsiv_aes_ks:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
vmovdqu xmm1,XMMWORD[rdi]
|
|
vmovdqa XMMWORD[rsi],xmm1
|
|
|
|
vmovdqa xmm0,XMMWORD[con1]
|
|
vmovdqa xmm15,XMMWORD[mask]
|
|
|
|
mov rax,8
|
|
|
|
$L$ks128_loop:
|
|
add rsi,16
|
|
sub rax,1
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpslldq xmm3,xmm1,4
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpslldq xmm3,xmm3,4
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpslldq xmm3,xmm3,4
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
vmovdqa XMMWORD[rsi],xmm1
|
|
jne NEAR $L$ks128_loop
|
|
|
|
vmovdqa xmm0,XMMWORD[con2]
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpslldq xmm3,xmm1,4
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpslldq xmm3,xmm3,4
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpslldq xmm3,xmm3,4
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
vmovdqa XMMWORD[16+rsi],xmm1
|
|
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslldq xmm3,xmm1,4
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpslldq xmm3,xmm3,4
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpslldq xmm3,xmm3,4
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
vmovdqa XMMWORD[32+rsi],xmm1
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes128gcmsiv_aes_ks:
|
|
global aes256gcmsiv_aes_ks
|
|
|
|
ALIGN 16
|
|
aes256gcmsiv_aes_ks:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes256gcmsiv_aes_ks:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
vmovdqu xmm1,XMMWORD[rdi]
|
|
vmovdqu xmm3,XMMWORD[16+rdi]
|
|
vmovdqa XMMWORD[rsi],xmm1
|
|
vmovdqa XMMWORD[16+rsi],xmm3
|
|
vmovdqa xmm0,XMMWORD[con1]
|
|
vmovdqa xmm15,XMMWORD[mask]
|
|
vpxor xmm14,xmm14,xmm14
|
|
mov rax,6
|
|
|
|
$L$ks256_loop:
|
|
add rsi,32
|
|
sub rax,1
|
|
vpshufb xmm2,xmm3,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpsllq xmm4,xmm1,32
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpshufb xmm4,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpxor xmm1,xmm1,xmm2
|
|
vmovdqa XMMWORD[rsi],xmm1
|
|
vpshufd xmm2,xmm1,0xff
|
|
vaesenclast xmm2,xmm2,xmm14
|
|
vpsllq xmm4,xmm3,32
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpshufb xmm4,xmm3,XMMWORD[con3]
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpxor xmm3,xmm3,xmm2
|
|
vmovdqa XMMWORD[16+rsi],xmm3
|
|
jne NEAR $L$ks256_loop
|
|
|
|
vpshufb xmm2,xmm3,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpsllq xmm4,xmm1,32
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpshufb xmm4,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpxor xmm1,xmm1,xmm2
|
|
vmovdqa XMMWORD[32+rsi],xmm1
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
global aes128gcmsiv_aes_ks_enc_x1
|
|
|
|
ALIGN 16
|
|
aes128gcmsiv_aes_ks_enc_x1:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes128gcmsiv_aes_ks_enc_x1:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
vmovdqa xmm1,XMMWORD[rcx]
|
|
vmovdqa xmm4,XMMWORD[rdi]
|
|
|
|
vmovdqa XMMWORD[rdx],xmm1
|
|
vpxor xmm4,xmm4,xmm1
|
|
|
|
vmovdqa xmm0,XMMWORD[con1]
|
|
vmovdqa xmm15,XMMWORD[mask]
|
|
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpsllq xmm3,xmm1,32
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpshufb xmm3,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vmovdqa XMMWORD[16+rdx],xmm1
|
|
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpsllq xmm3,xmm1,32
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpshufb xmm3,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vmovdqa XMMWORD[32+rdx],xmm1
|
|
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpsllq xmm3,xmm1,32
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpshufb xmm3,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vmovdqa XMMWORD[48+rdx],xmm1
|
|
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpsllq xmm3,xmm1,32
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpshufb xmm3,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vmovdqa XMMWORD[64+rdx],xmm1
|
|
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpsllq xmm3,xmm1,32
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpshufb xmm3,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vmovdqa XMMWORD[80+rdx],xmm1
|
|
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpsllq xmm3,xmm1,32
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpshufb xmm3,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vmovdqa XMMWORD[96+rdx],xmm1
|
|
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpsllq xmm3,xmm1,32
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpshufb xmm3,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vmovdqa XMMWORD[112+rdx],xmm1
|
|
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpsllq xmm3,xmm1,32
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpshufb xmm3,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vmovdqa XMMWORD[128+rdx],xmm1
|
|
|
|
|
|
vmovdqa xmm0,XMMWORD[con2]
|
|
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpsllq xmm3,xmm1,32
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpshufb xmm3,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vmovdqa XMMWORD[144+rdx],xmm1
|
|
|
|
vpshufb xmm2,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpsllq xmm3,xmm1,32
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpshufb xmm3,xmm1,XMMWORD[con3]
|
|
vpxor xmm1,xmm1,xmm3
|
|
vpxor xmm1,xmm1,xmm2
|
|
|
|
vaesenclast xmm4,xmm4,xmm1
|
|
vmovdqa XMMWORD[160+rdx],xmm1
|
|
|
|
|
|
vmovdqa XMMWORD[rsi],xmm4
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes128gcmsiv_aes_ks_enc_x1:
|
|
global aes128gcmsiv_kdf
|
|
|
|
ALIGN 16
|
|
aes128gcmsiv_kdf:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes128gcmsiv_kdf:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
|
|
|
|
|
|
|
|
vmovdqa xmm1,XMMWORD[rdx]
|
|
vmovdqa xmm9,XMMWORD[rdi]
|
|
vmovdqa xmm12,XMMWORD[and_mask]
|
|
vmovdqa xmm13,XMMWORD[one]
|
|
vpshufd xmm9,xmm9,0x90
|
|
vpand xmm9,xmm9,xmm12
|
|
vpaddd xmm10,xmm9,xmm13
|
|
vpaddd xmm11,xmm10,xmm13
|
|
vpaddd xmm12,xmm11,xmm13
|
|
|
|
vpxor xmm9,xmm9,xmm1
|
|
vpxor xmm10,xmm10,xmm1
|
|
vpxor xmm11,xmm11,xmm1
|
|
vpxor xmm12,xmm12,xmm1
|
|
|
|
vmovdqa xmm1,XMMWORD[16+rdx]
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vaesenc xmm10,xmm10,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[32+rdx]
|
|
vaesenc xmm9,xmm9,xmm2
|
|
vaesenc xmm10,xmm10,xmm2
|
|
vaesenc xmm11,xmm11,xmm2
|
|
vaesenc xmm12,xmm12,xmm2
|
|
|
|
vmovdqa xmm1,XMMWORD[48+rdx]
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vaesenc xmm10,xmm10,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[64+rdx]
|
|
vaesenc xmm9,xmm9,xmm2
|
|
vaesenc xmm10,xmm10,xmm2
|
|
vaesenc xmm11,xmm11,xmm2
|
|
vaesenc xmm12,xmm12,xmm2
|
|
|
|
vmovdqa xmm1,XMMWORD[80+rdx]
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vaesenc xmm10,xmm10,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[96+rdx]
|
|
vaesenc xmm9,xmm9,xmm2
|
|
vaesenc xmm10,xmm10,xmm2
|
|
vaesenc xmm11,xmm11,xmm2
|
|
vaesenc xmm12,xmm12,xmm2
|
|
|
|
vmovdqa xmm1,XMMWORD[112+rdx]
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vaesenc xmm10,xmm10,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[128+rdx]
|
|
vaesenc xmm9,xmm9,xmm2
|
|
vaesenc xmm10,xmm10,xmm2
|
|
vaesenc xmm11,xmm11,xmm2
|
|
vaesenc xmm12,xmm12,xmm2
|
|
|
|
vmovdqa xmm1,XMMWORD[144+rdx]
|
|
vaesenc xmm9,xmm9,xmm1
|
|
vaesenc xmm10,xmm10,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[160+rdx]
|
|
vaesenclast xmm9,xmm9,xmm2
|
|
vaesenclast xmm10,xmm10,xmm2
|
|
vaesenclast xmm11,xmm11,xmm2
|
|
vaesenclast xmm12,xmm12,xmm2
|
|
|
|
|
|
vmovdqa XMMWORD[rsi],xmm9
|
|
vmovdqa XMMWORD[16+rsi],xmm10
|
|
vmovdqa XMMWORD[32+rsi],xmm11
|
|
vmovdqa XMMWORD[48+rsi],xmm12
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes128gcmsiv_kdf:
|
|
global aes128gcmsiv_enc_msg_x4
|
|
|
|
ALIGN 16
|
|
aes128gcmsiv_enc_msg_x4:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes128gcmsiv_enc_msg_x4:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
mov r8,QWORD[40+rsp]
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
test r8,r8
|
|
jnz NEAR $L$128_enc_msg_x4_start
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$128_enc_msg_x4_start:
|
|
push r12
|
|
|
|
push r13
|
|
|
|
|
|
shr r8,4
|
|
mov r10,r8
|
|
shl r10,62
|
|
shr r10,62
|
|
|
|
|
|
vmovdqa xmm15,XMMWORD[rdx]
|
|
vpor xmm15,xmm15,XMMWORD[OR_MASK]
|
|
|
|
vmovdqu xmm4,XMMWORD[four]
|
|
vmovdqa xmm0,xmm15
|
|
vpaddd xmm1,xmm15,XMMWORD[one]
|
|
vpaddd xmm2,xmm15,XMMWORD[two]
|
|
vpaddd xmm3,xmm15,XMMWORD[three]
|
|
|
|
shr r8,2
|
|
je NEAR $L$128_enc_msg_x4_check_remainder
|
|
|
|
sub rsi,64
|
|
sub rdi,64
|
|
|
|
$L$128_enc_msg_x4_loop1:
|
|
add rsi,64
|
|
add rdi,64
|
|
|
|
vmovdqa xmm5,xmm0
|
|
vmovdqa xmm6,xmm1
|
|
vmovdqa xmm7,xmm2
|
|
vmovdqa xmm8,xmm3
|
|
|
|
vpxor xmm5,xmm5,XMMWORD[rcx]
|
|
vpxor xmm6,xmm6,XMMWORD[rcx]
|
|
vpxor xmm7,xmm7,XMMWORD[rcx]
|
|
vpxor xmm8,xmm8,XMMWORD[rcx]
|
|
|
|
vmovdqu xmm12,XMMWORD[16+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vpaddd xmm0,xmm0,xmm4
|
|
vmovdqu xmm12,XMMWORD[32+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vpaddd xmm1,xmm1,xmm4
|
|
vmovdqu xmm12,XMMWORD[48+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vpaddd xmm2,xmm2,xmm4
|
|
vmovdqu xmm12,XMMWORD[64+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vpaddd xmm3,xmm3,xmm4
|
|
|
|
vmovdqu xmm12,XMMWORD[80+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[96+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[112+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[128+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[144+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[160+rcx]
|
|
vaesenclast xmm5,xmm5,xmm12
|
|
vaesenclast xmm6,xmm6,xmm12
|
|
vaesenclast xmm7,xmm7,xmm12
|
|
vaesenclast xmm8,xmm8,xmm12
|
|
|
|
|
|
|
|
vpxor xmm5,xmm5,XMMWORD[rdi]
|
|
vpxor xmm6,xmm6,XMMWORD[16+rdi]
|
|
vpxor xmm7,xmm7,XMMWORD[32+rdi]
|
|
vpxor xmm8,xmm8,XMMWORD[48+rdi]
|
|
|
|
sub r8,1
|
|
|
|
vmovdqu XMMWORD[rsi],xmm5
|
|
vmovdqu XMMWORD[16+rsi],xmm6
|
|
vmovdqu XMMWORD[32+rsi],xmm7
|
|
vmovdqu XMMWORD[48+rsi],xmm8
|
|
|
|
jne NEAR $L$128_enc_msg_x4_loop1
|
|
|
|
add rsi,64
|
|
add rdi,64
|
|
|
|
$L$128_enc_msg_x4_check_remainder:
|
|
cmp r10,0
|
|
je NEAR $L$128_enc_msg_x4_out
|
|
|
|
$L$128_enc_msg_x4_loop2:
|
|
|
|
|
|
vmovdqa xmm5,xmm0
|
|
vpaddd xmm0,xmm0,XMMWORD[one]
|
|
|
|
vpxor xmm5,xmm5,XMMWORD[rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[16+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[32+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[48+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[64+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[80+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[96+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[112+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[128+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[144+rcx]
|
|
vaesenclast xmm5,xmm5,XMMWORD[160+rcx]
|
|
|
|
|
|
vpxor xmm5,xmm5,XMMWORD[rdi]
|
|
vmovdqu XMMWORD[rsi],xmm5
|
|
|
|
add rdi,16
|
|
add rsi,16
|
|
|
|
sub r10,1
|
|
jne NEAR $L$128_enc_msg_x4_loop2
|
|
|
|
$L$128_enc_msg_x4_out:
|
|
pop r13
|
|
|
|
pop r12
|
|
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes128gcmsiv_enc_msg_x4:
|
|
global aes128gcmsiv_enc_msg_x8
|
|
|
|
ALIGN 16
|
|
aes128gcmsiv_enc_msg_x8:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes128gcmsiv_enc_msg_x8:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
mov r8,QWORD[40+rsp]
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
test r8,r8
|
|
jnz NEAR $L$128_enc_msg_x8_start
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$128_enc_msg_x8_start:
|
|
push r12
|
|
|
|
push r13
|
|
|
|
push rbp
|
|
|
|
mov rbp,rsp
|
|
|
|
|
|
|
|
sub rsp,128
|
|
and rsp,-64
|
|
|
|
shr r8,4
|
|
mov r10,r8
|
|
shl r10,61
|
|
shr r10,61
|
|
|
|
|
|
vmovdqu xmm1,XMMWORD[rdx]
|
|
vpor xmm1,xmm1,XMMWORD[OR_MASK]
|
|
|
|
|
|
vpaddd xmm0,xmm1,XMMWORD[seven]
|
|
vmovdqu XMMWORD[rsp],xmm0
|
|
vpaddd xmm9,xmm1,XMMWORD[one]
|
|
vpaddd xmm10,xmm1,XMMWORD[two]
|
|
vpaddd xmm11,xmm1,XMMWORD[three]
|
|
vpaddd xmm12,xmm1,XMMWORD[four]
|
|
vpaddd xmm13,xmm1,XMMWORD[five]
|
|
vpaddd xmm14,xmm1,XMMWORD[six]
|
|
vmovdqa xmm0,xmm1
|
|
|
|
shr r8,3
|
|
je NEAR $L$128_enc_msg_x8_check_remainder
|
|
|
|
sub rsi,128
|
|
sub rdi,128
|
|
|
|
$L$128_enc_msg_x8_loop1:
|
|
add rsi,128
|
|
add rdi,128
|
|
|
|
vmovdqa xmm1,xmm0
|
|
vmovdqa xmm2,xmm9
|
|
vmovdqa xmm3,xmm10
|
|
vmovdqa xmm4,xmm11
|
|
vmovdqa xmm5,xmm12
|
|
vmovdqa xmm6,xmm13
|
|
vmovdqa xmm7,xmm14
|
|
|
|
vmovdqu xmm8,XMMWORD[rsp]
|
|
|
|
vpxor xmm1,xmm1,XMMWORD[rcx]
|
|
vpxor xmm2,xmm2,XMMWORD[rcx]
|
|
vpxor xmm3,xmm3,XMMWORD[rcx]
|
|
vpxor xmm4,xmm4,XMMWORD[rcx]
|
|
vpxor xmm5,xmm5,XMMWORD[rcx]
|
|
vpxor xmm6,xmm6,XMMWORD[rcx]
|
|
vpxor xmm7,xmm7,XMMWORD[rcx]
|
|
vpxor xmm8,xmm8,XMMWORD[rcx]
|
|
|
|
vmovdqu xmm15,XMMWORD[16+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vmovdqu xmm14,XMMWORD[rsp]
|
|
vpaddd xmm14,xmm14,XMMWORD[eight]
|
|
vmovdqu XMMWORD[rsp],xmm14
|
|
vmovdqu xmm15,XMMWORD[32+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpsubd xmm14,xmm14,XMMWORD[one]
|
|
vmovdqu xmm15,XMMWORD[48+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm0,xmm0,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[64+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm9,xmm9,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[80+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm10,xmm10,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[96+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm11,xmm11,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[112+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm12,xmm12,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[128+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm13,xmm13,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[144+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vmovdqu xmm15,XMMWORD[160+rcx]
|
|
vaesenclast xmm1,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm15
|
|
vaesenclast xmm3,xmm3,xmm15
|
|
vaesenclast xmm4,xmm4,xmm15
|
|
vaesenclast xmm5,xmm5,xmm15
|
|
vaesenclast xmm6,xmm6,xmm15
|
|
vaesenclast xmm7,xmm7,xmm15
|
|
vaesenclast xmm8,xmm8,xmm15
|
|
|
|
|
|
|
|
vpxor xmm1,xmm1,XMMWORD[rdi]
|
|
vpxor xmm2,xmm2,XMMWORD[16+rdi]
|
|
vpxor xmm3,xmm3,XMMWORD[32+rdi]
|
|
vpxor xmm4,xmm4,XMMWORD[48+rdi]
|
|
vpxor xmm5,xmm5,XMMWORD[64+rdi]
|
|
vpxor xmm6,xmm6,XMMWORD[80+rdi]
|
|
vpxor xmm7,xmm7,XMMWORD[96+rdi]
|
|
vpxor xmm8,xmm8,XMMWORD[112+rdi]
|
|
|
|
dec r8
|
|
|
|
vmovdqu XMMWORD[rsi],xmm1
|
|
vmovdqu XMMWORD[16+rsi],xmm2
|
|
vmovdqu XMMWORD[32+rsi],xmm3
|
|
vmovdqu XMMWORD[48+rsi],xmm4
|
|
vmovdqu XMMWORD[64+rsi],xmm5
|
|
vmovdqu XMMWORD[80+rsi],xmm6
|
|
vmovdqu XMMWORD[96+rsi],xmm7
|
|
vmovdqu XMMWORD[112+rsi],xmm8
|
|
|
|
jne NEAR $L$128_enc_msg_x8_loop1
|
|
|
|
add rsi,128
|
|
add rdi,128
|
|
|
|
$L$128_enc_msg_x8_check_remainder:
|
|
cmp r10,0
|
|
je NEAR $L$128_enc_msg_x8_out
|
|
|
|
$L$128_enc_msg_x8_loop2:
|
|
|
|
|
|
vmovdqa xmm1,xmm0
|
|
vpaddd xmm0,xmm0,XMMWORD[one]
|
|
|
|
vpxor xmm1,xmm1,XMMWORD[rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[16+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[32+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[48+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[64+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[80+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[96+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[112+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[128+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[144+rcx]
|
|
vaesenclast xmm1,xmm1,XMMWORD[160+rcx]
|
|
|
|
|
|
vpxor xmm1,xmm1,XMMWORD[rdi]
|
|
|
|
vmovdqu XMMWORD[rsi],xmm1
|
|
|
|
add rdi,16
|
|
add rsi,16
|
|
|
|
dec r10
|
|
jne NEAR $L$128_enc_msg_x8_loop2
|
|
|
|
$L$128_enc_msg_x8_out:
|
|
mov rsp,rbp
|
|
|
|
pop rbp
|
|
|
|
pop r13
|
|
|
|
pop r12
|
|
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes128gcmsiv_enc_msg_x8:
|
|
global aes128gcmsiv_dec
|
|
|
|
ALIGN 16
|
|
aes128gcmsiv_dec:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes128gcmsiv_dec:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
mov r8,QWORD[40+rsp]
|
|
mov r9,QWORD[48+rsp]
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
test r9,~15
|
|
jnz NEAR $L$128_dec_start
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$128_dec_start:
|
|
vzeroupper
|
|
vmovdqa xmm0,XMMWORD[rdx]
|
|
|
|
|
|
vmovdqu xmm15,XMMWORD[16+rdx]
|
|
vpor xmm15,xmm15,XMMWORD[OR_MASK]
|
|
mov rax,rdx
|
|
|
|
lea rax,[32+rax]
|
|
lea rcx,[32+rcx]
|
|
|
|
and r9,~15
|
|
|
|
|
|
cmp r9,96
|
|
jb NEAR $L$128_dec_loop2
|
|
|
|
|
|
sub r9,96
|
|
vmovdqa xmm7,xmm15
|
|
vpaddd xmm8,xmm7,XMMWORD[one]
|
|
vpaddd xmm9,xmm7,XMMWORD[two]
|
|
vpaddd xmm10,xmm9,XMMWORD[one]
|
|
vpaddd xmm11,xmm9,XMMWORD[two]
|
|
vpaddd xmm12,xmm11,XMMWORD[one]
|
|
vpaddd xmm15,xmm11,XMMWORD[two]
|
|
|
|
vpxor xmm7,xmm7,XMMWORD[r8]
|
|
vpxor xmm8,xmm8,XMMWORD[r8]
|
|
vpxor xmm9,xmm9,XMMWORD[r8]
|
|
vpxor xmm10,xmm10,XMMWORD[r8]
|
|
vpxor xmm11,xmm11,XMMWORD[r8]
|
|
vpxor xmm12,xmm12,XMMWORD[r8]
|
|
|
|
vmovdqu xmm4,XMMWORD[16+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[32+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[48+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[64+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[80+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[96+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[112+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[128+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[144+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[160+r8]
|
|
vaesenclast xmm7,xmm7,xmm4
|
|
vaesenclast xmm8,xmm8,xmm4
|
|
vaesenclast xmm9,xmm9,xmm4
|
|
vaesenclast xmm10,xmm10,xmm4
|
|
vaesenclast xmm11,xmm11,xmm4
|
|
vaesenclast xmm12,xmm12,xmm4
|
|
|
|
|
|
vpxor xmm7,xmm7,XMMWORD[rdi]
|
|
vpxor xmm8,xmm8,XMMWORD[16+rdi]
|
|
vpxor xmm9,xmm9,XMMWORD[32+rdi]
|
|
vpxor xmm10,xmm10,XMMWORD[48+rdi]
|
|
vpxor xmm11,xmm11,XMMWORD[64+rdi]
|
|
vpxor xmm12,xmm12,XMMWORD[80+rdi]
|
|
|
|
vmovdqu XMMWORD[rsi],xmm7
|
|
vmovdqu XMMWORD[16+rsi],xmm8
|
|
vmovdqu XMMWORD[32+rsi],xmm9
|
|
vmovdqu XMMWORD[48+rsi],xmm10
|
|
vmovdqu XMMWORD[64+rsi],xmm11
|
|
vmovdqu XMMWORD[80+rsi],xmm12
|
|
|
|
add rdi,96
|
|
add rsi,96
|
|
jmp NEAR $L$128_dec_loop1
|
|
|
|
|
|
ALIGN 64
|
|
$L$128_dec_loop1:
|
|
cmp r9,96
|
|
jb NEAR $L$128_dec_finish_96
|
|
sub r9,96
|
|
|
|
vmovdqa xmm6,xmm12
|
|
vmovdqa XMMWORD[(16-32)+rax],xmm11
|
|
vmovdqa XMMWORD[(32-32)+rax],xmm10
|
|
vmovdqa XMMWORD[(48-32)+rax],xmm9
|
|
vmovdqa XMMWORD[(64-32)+rax],xmm8
|
|
vmovdqa XMMWORD[(80-32)+rax],xmm7
|
|
|
|
vmovdqa xmm7,xmm15
|
|
vpaddd xmm8,xmm7,XMMWORD[one]
|
|
vpaddd xmm9,xmm7,XMMWORD[two]
|
|
vpaddd xmm10,xmm9,XMMWORD[one]
|
|
vpaddd xmm11,xmm9,XMMWORD[two]
|
|
vpaddd xmm12,xmm11,XMMWORD[one]
|
|
vpaddd xmm15,xmm11,XMMWORD[two]
|
|
|
|
vmovdqa xmm4,XMMWORD[r8]
|
|
vpxor xmm7,xmm7,xmm4
|
|
vpxor xmm8,xmm8,xmm4
|
|
vpxor xmm9,xmm9,xmm4
|
|
vpxor xmm10,xmm10,xmm4
|
|
vpxor xmm11,xmm11,xmm4
|
|
vpxor xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[((0-32))+rcx]
|
|
vpclmulqdq xmm2,xmm6,xmm4,0x11
|
|
vpclmulqdq xmm3,xmm6,xmm4,0x00
|
|
vpclmulqdq xmm1,xmm6,xmm4,0x01
|
|
vpclmulqdq xmm4,xmm6,xmm4,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[16+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[((-16))+rax]
|
|
vmovdqu xmm13,XMMWORD[((-16))+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
|
|
vmovdqu xmm4,XMMWORD[32+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[rax]
|
|
vmovdqu xmm13,XMMWORD[rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
|
|
vmovdqu xmm4,XMMWORD[48+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[16+rax]
|
|
vmovdqu xmm13,XMMWORD[16+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
|
|
vmovdqu xmm4,XMMWORD[64+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[32+rax]
|
|
vmovdqu xmm13,XMMWORD[32+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
|
|
vmovdqu xmm4,XMMWORD[80+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[96+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[112+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
|
|
vmovdqa xmm6,XMMWORD[((80-32))+rax]
|
|
vpxor xmm6,xmm6,xmm0
|
|
vmovdqu xmm5,XMMWORD[((80-32))+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[128+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
|
|
vpsrldq xmm4,xmm1,8
|
|
vpxor xmm5,xmm2,xmm4
|
|
vpslldq xmm4,xmm1,8
|
|
vpxor xmm0,xmm3,xmm4
|
|
|
|
vmovdqa xmm3,XMMWORD[poly]
|
|
|
|
vmovdqu xmm4,XMMWORD[144+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[160+r8]
|
|
vpalignr xmm2,xmm0,xmm0,8
|
|
vpclmulqdq xmm0,xmm0,xmm3,0x10
|
|
vpxor xmm0,xmm2,xmm0
|
|
|
|
vpxor xmm4,xmm6,XMMWORD[rdi]
|
|
vaesenclast xmm7,xmm7,xmm4
|
|
vpxor xmm4,xmm6,XMMWORD[16+rdi]
|
|
vaesenclast xmm8,xmm8,xmm4
|
|
vpxor xmm4,xmm6,XMMWORD[32+rdi]
|
|
vaesenclast xmm9,xmm9,xmm4
|
|
vpxor xmm4,xmm6,XMMWORD[48+rdi]
|
|
vaesenclast xmm10,xmm10,xmm4
|
|
vpxor xmm4,xmm6,XMMWORD[64+rdi]
|
|
vaesenclast xmm11,xmm11,xmm4
|
|
vpxor xmm4,xmm6,XMMWORD[80+rdi]
|
|
vaesenclast xmm12,xmm12,xmm4
|
|
|
|
vpalignr xmm2,xmm0,xmm0,8
|
|
vpclmulqdq xmm0,xmm0,xmm3,0x10
|
|
vpxor xmm0,xmm2,xmm0
|
|
|
|
vmovdqu XMMWORD[rsi],xmm7
|
|
vmovdqu XMMWORD[16+rsi],xmm8
|
|
vmovdqu XMMWORD[32+rsi],xmm9
|
|
vmovdqu XMMWORD[48+rsi],xmm10
|
|
vmovdqu XMMWORD[64+rsi],xmm11
|
|
vmovdqu XMMWORD[80+rsi],xmm12
|
|
|
|
vpxor xmm0,xmm0,xmm5
|
|
|
|
lea rdi,[96+rdi]
|
|
lea rsi,[96+rsi]
|
|
jmp NEAR $L$128_dec_loop1
|
|
|
|
$L$128_dec_finish_96:
|
|
vmovdqa xmm6,xmm12
|
|
vmovdqa XMMWORD[(16-32)+rax],xmm11
|
|
vmovdqa XMMWORD[(32-32)+rax],xmm10
|
|
vmovdqa XMMWORD[(48-32)+rax],xmm9
|
|
vmovdqa XMMWORD[(64-32)+rax],xmm8
|
|
vmovdqa XMMWORD[(80-32)+rax],xmm7
|
|
|
|
vmovdqu xmm4,XMMWORD[((0-32))+rcx]
|
|
vpclmulqdq xmm1,xmm6,xmm4,0x10
|
|
vpclmulqdq xmm2,xmm6,xmm4,0x11
|
|
vpclmulqdq xmm3,xmm6,xmm4,0x00
|
|
vpclmulqdq xmm4,xmm6,xmm4,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[((-16))+rax]
|
|
vmovdqu xmm13,XMMWORD[((-16))+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[rax]
|
|
vmovdqu xmm13,XMMWORD[rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[16+rax]
|
|
vmovdqu xmm13,XMMWORD[16+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[32+rax]
|
|
vmovdqu xmm13,XMMWORD[32+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
|
|
vmovdqu xmm6,XMMWORD[((80-32))+rax]
|
|
vpxor xmm6,xmm6,xmm0
|
|
vmovdqu xmm5,XMMWORD[((80-32))+rcx]
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vpsrldq xmm4,xmm1,8
|
|
vpxor xmm5,xmm2,xmm4
|
|
vpslldq xmm4,xmm1,8
|
|
vpxor xmm0,xmm3,xmm4
|
|
|
|
vmovdqa xmm3,XMMWORD[poly]
|
|
|
|
vpalignr xmm2,xmm0,xmm0,8
|
|
vpclmulqdq xmm0,xmm0,xmm3,0x10
|
|
vpxor xmm0,xmm2,xmm0
|
|
|
|
vpalignr xmm2,xmm0,xmm0,8
|
|
vpclmulqdq xmm0,xmm0,xmm3,0x10
|
|
vpxor xmm0,xmm2,xmm0
|
|
|
|
vpxor xmm0,xmm0,xmm5
|
|
|
|
$L$128_dec_loop2:
|
|
|
|
|
|
|
|
cmp r9,16
|
|
jb NEAR $L$128_dec_out
|
|
sub r9,16
|
|
|
|
vmovdqa xmm2,xmm15
|
|
vpaddd xmm15,xmm15,XMMWORD[one]
|
|
|
|
vpxor xmm2,xmm2,XMMWORD[r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[16+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[32+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[48+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[64+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[80+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[96+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[112+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[128+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[144+r8]
|
|
vaesenclast xmm2,xmm2,XMMWORD[160+r8]
|
|
vpxor xmm2,xmm2,XMMWORD[rdi]
|
|
vmovdqu XMMWORD[rsi],xmm2
|
|
add rdi,16
|
|
add rsi,16
|
|
|
|
vpxor xmm0,xmm0,xmm2
|
|
vmovdqa xmm1,XMMWORD[((-32))+rcx]
|
|
call GFMUL
|
|
|
|
jmp NEAR $L$128_dec_loop2
|
|
|
|
$L$128_dec_out:
|
|
vmovdqu XMMWORD[rdx],xmm0
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes128gcmsiv_dec:
|
|
global aes128gcmsiv_ecb_enc_block
|
|
|
|
ALIGN 16
|
|
aes128gcmsiv_ecb_enc_block:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes128gcmsiv_ecb_enc_block:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
vmovdqa xmm1,XMMWORD[rdi]
|
|
|
|
vpxor xmm1,xmm1,XMMWORD[rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[16+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[32+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[48+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[64+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[80+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[96+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[112+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[128+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[144+rdx]
|
|
vaesenclast xmm1,xmm1,XMMWORD[160+rdx]
|
|
|
|
vmovdqa XMMWORD[rsi],xmm1
|
|
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes128gcmsiv_ecb_enc_block:
|
|
global aes256gcmsiv_aes_ks_enc_x1
|
|
|
|
ALIGN 16
|
|
aes256gcmsiv_aes_ks_enc_x1:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes256gcmsiv_aes_ks_enc_x1:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
vmovdqa xmm0,XMMWORD[con1]
|
|
vmovdqa xmm15,XMMWORD[mask]
|
|
vmovdqa xmm8,XMMWORD[rdi]
|
|
vmovdqa xmm1,XMMWORD[rcx]
|
|
vmovdqa xmm3,XMMWORD[16+rcx]
|
|
vpxor xmm8,xmm8,xmm1
|
|
vaesenc xmm8,xmm8,xmm3
|
|
vmovdqu XMMWORD[rdx],xmm1
|
|
vmovdqu XMMWORD[16+rdx],xmm3
|
|
vpxor xmm14,xmm14,xmm14
|
|
|
|
vpshufb xmm2,xmm3,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpslldq xmm4,xmm1,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpxor xmm1,xmm1,xmm2
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vmovdqu XMMWORD[32+rdx],xmm1
|
|
|
|
vpshufd xmm2,xmm1,0xff
|
|
vaesenclast xmm2,xmm2,xmm14
|
|
vpslldq xmm4,xmm3,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpxor xmm3,xmm3,xmm2
|
|
vaesenc xmm8,xmm8,xmm3
|
|
vmovdqu XMMWORD[48+rdx],xmm3
|
|
|
|
vpshufb xmm2,xmm3,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpslldq xmm4,xmm1,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpxor xmm1,xmm1,xmm2
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vmovdqu XMMWORD[64+rdx],xmm1
|
|
|
|
vpshufd xmm2,xmm1,0xff
|
|
vaesenclast xmm2,xmm2,xmm14
|
|
vpslldq xmm4,xmm3,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpxor xmm3,xmm3,xmm2
|
|
vaesenc xmm8,xmm8,xmm3
|
|
vmovdqu XMMWORD[80+rdx],xmm3
|
|
|
|
vpshufb xmm2,xmm3,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpslldq xmm4,xmm1,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpxor xmm1,xmm1,xmm2
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vmovdqu XMMWORD[96+rdx],xmm1
|
|
|
|
vpshufd xmm2,xmm1,0xff
|
|
vaesenclast xmm2,xmm2,xmm14
|
|
vpslldq xmm4,xmm3,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpxor xmm3,xmm3,xmm2
|
|
vaesenc xmm8,xmm8,xmm3
|
|
vmovdqu XMMWORD[112+rdx],xmm3
|
|
|
|
vpshufb xmm2,xmm3,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpslldq xmm4,xmm1,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpxor xmm1,xmm1,xmm2
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vmovdqu XMMWORD[128+rdx],xmm1
|
|
|
|
vpshufd xmm2,xmm1,0xff
|
|
vaesenclast xmm2,xmm2,xmm14
|
|
vpslldq xmm4,xmm3,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpxor xmm3,xmm3,xmm2
|
|
vaesenc xmm8,xmm8,xmm3
|
|
vmovdqu XMMWORD[144+rdx],xmm3
|
|
|
|
vpshufb xmm2,xmm3,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpslldq xmm4,xmm1,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpxor xmm1,xmm1,xmm2
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vmovdqu XMMWORD[160+rdx],xmm1
|
|
|
|
vpshufd xmm2,xmm1,0xff
|
|
vaesenclast xmm2,xmm2,xmm14
|
|
vpslldq xmm4,xmm3,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpxor xmm3,xmm3,xmm2
|
|
vaesenc xmm8,xmm8,xmm3
|
|
vmovdqu XMMWORD[176+rdx],xmm3
|
|
|
|
vpshufb xmm2,xmm3,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslld xmm0,xmm0,1
|
|
vpslldq xmm4,xmm1,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpxor xmm1,xmm1,xmm2
|
|
vaesenc xmm8,xmm8,xmm1
|
|
vmovdqu XMMWORD[192+rdx],xmm1
|
|
|
|
vpshufd xmm2,xmm1,0xff
|
|
vaesenclast xmm2,xmm2,xmm14
|
|
vpslldq xmm4,xmm3,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpxor xmm3,xmm3,xmm2
|
|
vaesenc xmm8,xmm8,xmm3
|
|
vmovdqu XMMWORD[208+rdx],xmm3
|
|
|
|
vpshufb xmm2,xmm3,xmm15
|
|
vaesenclast xmm2,xmm2,xmm0
|
|
vpslldq xmm4,xmm1,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpslldq xmm4,xmm4,4
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpxor xmm1,xmm1,xmm2
|
|
vaesenclast xmm8,xmm8,xmm1
|
|
vmovdqu XMMWORD[224+rdx],xmm1
|
|
|
|
vmovdqa XMMWORD[rsi],xmm8
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes256gcmsiv_aes_ks_enc_x1:
|
|
global aes256gcmsiv_ecb_enc_block
|
|
|
|
ALIGN 16
|
|
aes256gcmsiv_ecb_enc_block:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes256gcmsiv_ecb_enc_block:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
vmovdqa xmm1,XMMWORD[rdi]
|
|
vpxor xmm1,xmm1,XMMWORD[rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[16+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[32+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[48+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[64+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[80+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[96+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[112+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[128+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[144+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[160+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[176+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[192+rdx]
|
|
vaesenc xmm1,xmm1,XMMWORD[208+rdx]
|
|
vaesenclast xmm1,xmm1,XMMWORD[224+rdx]
|
|
vmovdqa XMMWORD[rsi],xmm1
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes256gcmsiv_ecb_enc_block:
|
|
global aes256gcmsiv_enc_msg_x4
|
|
|
|
ALIGN 16
|
|
aes256gcmsiv_enc_msg_x4:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes256gcmsiv_enc_msg_x4:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
mov r8,QWORD[40+rsp]
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
test r8,r8
|
|
jnz NEAR $L$256_enc_msg_x4_start
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$256_enc_msg_x4_start:
|
|
mov r10,r8
|
|
shr r8,4
|
|
shl r10,60
|
|
jz NEAR $L$256_enc_msg_x4_start2
|
|
add r8,1
|
|
|
|
$L$256_enc_msg_x4_start2:
|
|
mov r10,r8
|
|
shl r10,62
|
|
shr r10,62
|
|
|
|
|
|
vmovdqa xmm15,XMMWORD[rdx]
|
|
vpor xmm15,xmm15,XMMWORD[OR_MASK]
|
|
|
|
vmovdqa xmm4,XMMWORD[four]
|
|
vmovdqa xmm0,xmm15
|
|
vpaddd xmm1,xmm15,XMMWORD[one]
|
|
vpaddd xmm2,xmm15,XMMWORD[two]
|
|
vpaddd xmm3,xmm15,XMMWORD[three]
|
|
|
|
shr r8,2
|
|
je NEAR $L$256_enc_msg_x4_check_remainder
|
|
|
|
sub rsi,64
|
|
sub rdi,64
|
|
|
|
$L$256_enc_msg_x4_loop1:
|
|
add rsi,64
|
|
add rdi,64
|
|
|
|
vmovdqa xmm5,xmm0
|
|
vmovdqa xmm6,xmm1
|
|
vmovdqa xmm7,xmm2
|
|
vmovdqa xmm8,xmm3
|
|
|
|
vpxor xmm5,xmm5,XMMWORD[rcx]
|
|
vpxor xmm6,xmm6,XMMWORD[rcx]
|
|
vpxor xmm7,xmm7,XMMWORD[rcx]
|
|
vpxor xmm8,xmm8,XMMWORD[rcx]
|
|
|
|
vmovdqu xmm12,XMMWORD[16+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vpaddd xmm0,xmm0,xmm4
|
|
vmovdqu xmm12,XMMWORD[32+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vpaddd xmm1,xmm1,xmm4
|
|
vmovdqu xmm12,XMMWORD[48+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vpaddd xmm2,xmm2,xmm4
|
|
vmovdqu xmm12,XMMWORD[64+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vpaddd xmm3,xmm3,xmm4
|
|
|
|
vmovdqu xmm12,XMMWORD[80+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[96+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[112+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[128+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[144+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[160+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[176+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[192+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[208+rcx]
|
|
vaesenc xmm5,xmm5,xmm12
|
|
vaesenc xmm6,xmm6,xmm12
|
|
vaesenc xmm7,xmm7,xmm12
|
|
vaesenc xmm8,xmm8,xmm12
|
|
|
|
vmovdqu xmm12,XMMWORD[224+rcx]
|
|
vaesenclast xmm5,xmm5,xmm12
|
|
vaesenclast xmm6,xmm6,xmm12
|
|
vaesenclast xmm7,xmm7,xmm12
|
|
vaesenclast xmm8,xmm8,xmm12
|
|
|
|
|
|
|
|
vpxor xmm5,xmm5,XMMWORD[rdi]
|
|
vpxor xmm6,xmm6,XMMWORD[16+rdi]
|
|
vpxor xmm7,xmm7,XMMWORD[32+rdi]
|
|
vpxor xmm8,xmm8,XMMWORD[48+rdi]
|
|
|
|
sub r8,1
|
|
|
|
vmovdqu XMMWORD[rsi],xmm5
|
|
vmovdqu XMMWORD[16+rsi],xmm6
|
|
vmovdqu XMMWORD[32+rsi],xmm7
|
|
vmovdqu XMMWORD[48+rsi],xmm8
|
|
|
|
jne NEAR $L$256_enc_msg_x4_loop1
|
|
|
|
add rsi,64
|
|
add rdi,64
|
|
|
|
$L$256_enc_msg_x4_check_remainder:
|
|
cmp r10,0
|
|
je NEAR $L$256_enc_msg_x4_out
|
|
|
|
$L$256_enc_msg_x4_loop2:
|
|
|
|
|
|
|
|
vmovdqa xmm5,xmm0
|
|
vpaddd xmm0,xmm0,XMMWORD[one]
|
|
vpxor xmm5,xmm5,XMMWORD[rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[16+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[32+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[48+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[64+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[80+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[96+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[112+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[128+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[144+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[160+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[176+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[192+rcx]
|
|
vaesenc xmm5,xmm5,XMMWORD[208+rcx]
|
|
vaesenclast xmm5,xmm5,XMMWORD[224+rcx]
|
|
|
|
|
|
vpxor xmm5,xmm5,XMMWORD[rdi]
|
|
|
|
vmovdqu XMMWORD[rsi],xmm5
|
|
|
|
add rdi,16
|
|
add rsi,16
|
|
|
|
sub r10,1
|
|
jne NEAR $L$256_enc_msg_x4_loop2
|
|
|
|
$L$256_enc_msg_x4_out:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes256gcmsiv_enc_msg_x4:
|
|
global aes256gcmsiv_enc_msg_x8
|
|
|
|
ALIGN 16
|
|
aes256gcmsiv_enc_msg_x8:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes256gcmsiv_enc_msg_x8:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
mov r8,QWORD[40+rsp]
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
test r8,r8
|
|
jnz NEAR $L$256_enc_msg_x8_start
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$256_enc_msg_x8_start:
|
|
|
|
mov r11,rsp
|
|
sub r11,16
|
|
and r11,-64
|
|
|
|
mov r10,r8
|
|
shr r8,4
|
|
shl r10,60
|
|
jz NEAR $L$256_enc_msg_x8_start2
|
|
add r8,1
|
|
|
|
$L$256_enc_msg_x8_start2:
|
|
mov r10,r8
|
|
shl r10,61
|
|
shr r10,61
|
|
|
|
|
|
vmovdqa xmm1,XMMWORD[rdx]
|
|
vpor xmm1,xmm1,XMMWORD[OR_MASK]
|
|
|
|
|
|
vpaddd xmm0,xmm1,XMMWORD[seven]
|
|
vmovdqa XMMWORD[r11],xmm0
|
|
vpaddd xmm9,xmm1,XMMWORD[one]
|
|
vpaddd xmm10,xmm1,XMMWORD[two]
|
|
vpaddd xmm11,xmm1,XMMWORD[three]
|
|
vpaddd xmm12,xmm1,XMMWORD[four]
|
|
vpaddd xmm13,xmm1,XMMWORD[five]
|
|
vpaddd xmm14,xmm1,XMMWORD[six]
|
|
vmovdqa xmm0,xmm1
|
|
|
|
shr r8,3
|
|
jz NEAR $L$256_enc_msg_x8_check_remainder
|
|
|
|
sub rsi,128
|
|
sub rdi,128
|
|
|
|
$L$256_enc_msg_x8_loop1:
|
|
add rsi,128
|
|
add rdi,128
|
|
|
|
vmovdqa xmm1,xmm0
|
|
vmovdqa xmm2,xmm9
|
|
vmovdqa xmm3,xmm10
|
|
vmovdqa xmm4,xmm11
|
|
vmovdqa xmm5,xmm12
|
|
vmovdqa xmm6,xmm13
|
|
vmovdqa xmm7,xmm14
|
|
|
|
vmovdqa xmm8,XMMWORD[r11]
|
|
|
|
vpxor xmm1,xmm1,XMMWORD[rcx]
|
|
vpxor xmm2,xmm2,XMMWORD[rcx]
|
|
vpxor xmm3,xmm3,XMMWORD[rcx]
|
|
vpxor xmm4,xmm4,XMMWORD[rcx]
|
|
vpxor xmm5,xmm5,XMMWORD[rcx]
|
|
vpxor xmm6,xmm6,XMMWORD[rcx]
|
|
vpxor xmm7,xmm7,XMMWORD[rcx]
|
|
vpxor xmm8,xmm8,XMMWORD[rcx]
|
|
|
|
vmovdqu xmm15,XMMWORD[16+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vmovdqa xmm14,XMMWORD[r11]
|
|
vpaddd xmm14,xmm14,XMMWORD[eight]
|
|
vmovdqa XMMWORD[r11],xmm14
|
|
vmovdqu xmm15,XMMWORD[32+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpsubd xmm14,xmm14,XMMWORD[one]
|
|
vmovdqu xmm15,XMMWORD[48+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm0,xmm0,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[64+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm9,xmm9,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[80+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm10,xmm10,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[96+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm11,xmm11,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[112+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm12,xmm12,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[128+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vpaddd xmm13,xmm13,XMMWORD[eight]
|
|
vmovdqu xmm15,XMMWORD[144+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vmovdqu xmm15,XMMWORD[160+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vmovdqu xmm15,XMMWORD[176+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vmovdqu xmm15,XMMWORD[192+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vmovdqu xmm15,XMMWORD[208+rcx]
|
|
vaesenc xmm1,xmm1,xmm15
|
|
vaesenc xmm2,xmm2,xmm15
|
|
vaesenc xmm3,xmm3,xmm15
|
|
vaesenc xmm4,xmm4,xmm15
|
|
vaesenc xmm5,xmm5,xmm15
|
|
vaesenc xmm6,xmm6,xmm15
|
|
vaesenc xmm7,xmm7,xmm15
|
|
vaesenc xmm8,xmm8,xmm15
|
|
|
|
vmovdqu xmm15,XMMWORD[224+rcx]
|
|
vaesenclast xmm1,xmm1,xmm15
|
|
vaesenclast xmm2,xmm2,xmm15
|
|
vaesenclast xmm3,xmm3,xmm15
|
|
vaesenclast xmm4,xmm4,xmm15
|
|
vaesenclast xmm5,xmm5,xmm15
|
|
vaesenclast xmm6,xmm6,xmm15
|
|
vaesenclast xmm7,xmm7,xmm15
|
|
vaesenclast xmm8,xmm8,xmm15
|
|
|
|
|
|
|
|
vpxor xmm1,xmm1,XMMWORD[rdi]
|
|
vpxor xmm2,xmm2,XMMWORD[16+rdi]
|
|
vpxor xmm3,xmm3,XMMWORD[32+rdi]
|
|
vpxor xmm4,xmm4,XMMWORD[48+rdi]
|
|
vpxor xmm5,xmm5,XMMWORD[64+rdi]
|
|
vpxor xmm6,xmm6,XMMWORD[80+rdi]
|
|
vpxor xmm7,xmm7,XMMWORD[96+rdi]
|
|
vpxor xmm8,xmm8,XMMWORD[112+rdi]
|
|
|
|
sub r8,1
|
|
|
|
vmovdqu XMMWORD[rsi],xmm1
|
|
vmovdqu XMMWORD[16+rsi],xmm2
|
|
vmovdqu XMMWORD[32+rsi],xmm3
|
|
vmovdqu XMMWORD[48+rsi],xmm4
|
|
vmovdqu XMMWORD[64+rsi],xmm5
|
|
vmovdqu XMMWORD[80+rsi],xmm6
|
|
vmovdqu XMMWORD[96+rsi],xmm7
|
|
vmovdqu XMMWORD[112+rsi],xmm8
|
|
|
|
jne NEAR $L$256_enc_msg_x8_loop1
|
|
|
|
add rsi,128
|
|
add rdi,128
|
|
|
|
$L$256_enc_msg_x8_check_remainder:
|
|
cmp r10,0
|
|
je NEAR $L$256_enc_msg_x8_out
|
|
|
|
$L$256_enc_msg_x8_loop2:
|
|
|
|
|
|
vmovdqa xmm1,xmm0
|
|
vpaddd xmm0,xmm0,XMMWORD[one]
|
|
|
|
vpxor xmm1,xmm1,XMMWORD[rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[16+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[32+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[48+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[64+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[80+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[96+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[112+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[128+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[144+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[160+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[176+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[192+rcx]
|
|
vaesenc xmm1,xmm1,XMMWORD[208+rcx]
|
|
vaesenclast xmm1,xmm1,XMMWORD[224+rcx]
|
|
|
|
|
|
vpxor xmm1,xmm1,XMMWORD[rdi]
|
|
|
|
vmovdqu XMMWORD[rsi],xmm1
|
|
|
|
add rdi,16
|
|
add rsi,16
|
|
sub r10,1
|
|
jnz NEAR $L$256_enc_msg_x8_loop2
|
|
|
|
$L$256_enc_msg_x8_out:
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
|
|
$L$SEH_end_aes256gcmsiv_enc_msg_x8:
|
|
global aes256gcmsiv_dec
|
|
|
|
ALIGN 16
|
|
aes256gcmsiv_dec:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes256gcmsiv_dec:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
mov rcx,r9
|
|
mov r8,QWORD[40+rsp]
|
|
mov r9,QWORD[48+rsp]
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
test r9,~15
|
|
jnz NEAR $L$256_dec_start
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$256_dec_start:
|
|
vzeroupper
|
|
vmovdqa xmm0,XMMWORD[rdx]
|
|
|
|
|
|
vmovdqu xmm15,XMMWORD[16+rdx]
|
|
vpor xmm15,xmm15,XMMWORD[OR_MASK]
|
|
mov rax,rdx
|
|
|
|
lea rax,[32+rax]
|
|
lea rcx,[32+rcx]
|
|
|
|
and r9,~15
|
|
|
|
|
|
cmp r9,96
|
|
jb NEAR $L$256_dec_loop2
|
|
|
|
|
|
sub r9,96
|
|
vmovdqa xmm7,xmm15
|
|
vpaddd xmm8,xmm7,XMMWORD[one]
|
|
vpaddd xmm9,xmm7,XMMWORD[two]
|
|
vpaddd xmm10,xmm9,XMMWORD[one]
|
|
vpaddd xmm11,xmm9,XMMWORD[two]
|
|
vpaddd xmm12,xmm11,XMMWORD[one]
|
|
vpaddd xmm15,xmm11,XMMWORD[two]
|
|
|
|
vpxor xmm7,xmm7,XMMWORD[r8]
|
|
vpxor xmm8,xmm8,XMMWORD[r8]
|
|
vpxor xmm9,xmm9,XMMWORD[r8]
|
|
vpxor xmm10,xmm10,XMMWORD[r8]
|
|
vpxor xmm11,xmm11,XMMWORD[r8]
|
|
vpxor xmm12,xmm12,XMMWORD[r8]
|
|
|
|
vmovdqu xmm4,XMMWORD[16+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[32+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[48+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[64+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[80+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[96+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[112+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[128+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[144+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[160+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[176+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[192+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[208+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[224+r8]
|
|
vaesenclast xmm7,xmm7,xmm4
|
|
vaesenclast xmm8,xmm8,xmm4
|
|
vaesenclast xmm9,xmm9,xmm4
|
|
vaesenclast xmm10,xmm10,xmm4
|
|
vaesenclast xmm11,xmm11,xmm4
|
|
vaesenclast xmm12,xmm12,xmm4
|
|
|
|
|
|
vpxor xmm7,xmm7,XMMWORD[rdi]
|
|
vpxor xmm8,xmm8,XMMWORD[16+rdi]
|
|
vpxor xmm9,xmm9,XMMWORD[32+rdi]
|
|
vpxor xmm10,xmm10,XMMWORD[48+rdi]
|
|
vpxor xmm11,xmm11,XMMWORD[64+rdi]
|
|
vpxor xmm12,xmm12,XMMWORD[80+rdi]
|
|
|
|
vmovdqu XMMWORD[rsi],xmm7
|
|
vmovdqu XMMWORD[16+rsi],xmm8
|
|
vmovdqu XMMWORD[32+rsi],xmm9
|
|
vmovdqu XMMWORD[48+rsi],xmm10
|
|
vmovdqu XMMWORD[64+rsi],xmm11
|
|
vmovdqu XMMWORD[80+rsi],xmm12
|
|
|
|
add rdi,96
|
|
add rsi,96
|
|
jmp NEAR $L$256_dec_loop1
|
|
|
|
|
|
ALIGN 64
|
|
$L$256_dec_loop1:
|
|
cmp r9,96
|
|
jb NEAR $L$256_dec_finish_96
|
|
sub r9,96
|
|
|
|
vmovdqa xmm6,xmm12
|
|
vmovdqa XMMWORD[(16-32)+rax],xmm11
|
|
vmovdqa XMMWORD[(32-32)+rax],xmm10
|
|
vmovdqa XMMWORD[(48-32)+rax],xmm9
|
|
vmovdqa XMMWORD[(64-32)+rax],xmm8
|
|
vmovdqa XMMWORD[(80-32)+rax],xmm7
|
|
|
|
vmovdqa xmm7,xmm15
|
|
vpaddd xmm8,xmm7,XMMWORD[one]
|
|
vpaddd xmm9,xmm7,XMMWORD[two]
|
|
vpaddd xmm10,xmm9,XMMWORD[one]
|
|
vpaddd xmm11,xmm9,XMMWORD[two]
|
|
vpaddd xmm12,xmm11,XMMWORD[one]
|
|
vpaddd xmm15,xmm11,XMMWORD[two]
|
|
|
|
vmovdqa xmm4,XMMWORD[r8]
|
|
vpxor xmm7,xmm7,xmm4
|
|
vpxor xmm8,xmm8,xmm4
|
|
vpxor xmm9,xmm9,xmm4
|
|
vpxor xmm10,xmm10,xmm4
|
|
vpxor xmm11,xmm11,xmm4
|
|
vpxor xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[((0-32))+rcx]
|
|
vpclmulqdq xmm2,xmm6,xmm4,0x11
|
|
vpclmulqdq xmm3,xmm6,xmm4,0x00
|
|
vpclmulqdq xmm1,xmm6,xmm4,0x01
|
|
vpclmulqdq xmm4,xmm6,xmm4,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[16+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[((-16))+rax]
|
|
vmovdqu xmm13,XMMWORD[((-16))+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
|
|
vmovdqu xmm4,XMMWORD[32+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[rax]
|
|
vmovdqu xmm13,XMMWORD[rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
|
|
vmovdqu xmm4,XMMWORD[48+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[16+rax]
|
|
vmovdqu xmm13,XMMWORD[16+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
|
|
vmovdqu xmm4,XMMWORD[64+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[32+rax]
|
|
vmovdqu xmm13,XMMWORD[32+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
|
|
vmovdqu xmm4,XMMWORD[80+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[96+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[112+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
|
|
vmovdqa xmm6,XMMWORD[((80-32))+rax]
|
|
vpxor xmm6,xmm6,xmm0
|
|
vmovdqu xmm5,XMMWORD[((80-32))+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[128+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
|
|
vpsrldq xmm4,xmm1,8
|
|
vpxor xmm5,xmm2,xmm4
|
|
vpslldq xmm4,xmm1,8
|
|
vpxor xmm0,xmm3,xmm4
|
|
|
|
vmovdqa xmm3,XMMWORD[poly]
|
|
|
|
vmovdqu xmm4,XMMWORD[144+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[160+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[176+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[192+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm4,XMMWORD[208+r8]
|
|
vaesenc xmm7,xmm7,xmm4
|
|
vaesenc xmm8,xmm8,xmm4
|
|
vaesenc xmm9,xmm9,xmm4
|
|
vaesenc xmm10,xmm10,xmm4
|
|
vaesenc xmm11,xmm11,xmm4
|
|
vaesenc xmm12,xmm12,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[224+r8]
|
|
vpalignr xmm2,xmm0,xmm0,8
|
|
vpclmulqdq xmm0,xmm0,xmm3,0x10
|
|
vpxor xmm0,xmm2,xmm0
|
|
|
|
vpxor xmm4,xmm6,XMMWORD[rdi]
|
|
vaesenclast xmm7,xmm7,xmm4
|
|
vpxor xmm4,xmm6,XMMWORD[16+rdi]
|
|
vaesenclast xmm8,xmm8,xmm4
|
|
vpxor xmm4,xmm6,XMMWORD[32+rdi]
|
|
vaesenclast xmm9,xmm9,xmm4
|
|
vpxor xmm4,xmm6,XMMWORD[48+rdi]
|
|
vaesenclast xmm10,xmm10,xmm4
|
|
vpxor xmm4,xmm6,XMMWORD[64+rdi]
|
|
vaesenclast xmm11,xmm11,xmm4
|
|
vpxor xmm4,xmm6,XMMWORD[80+rdi]
|
|
vaesenclast xmm12,xmm12,xmm4
|
|
|
|
vpalignr xmm2,xmm0,xmm0,8
|
|
vpclmulqdq xmm0,xmm0,xmm3,0x10
|
|
vpxor xmm0,xmm2,xmm0
|
|
|
|
vmovdqu XMMWORD[rsi],xmm7
|
|
vmovdqu XMMWORD[16+rsi],xmm8
|
|
vmovdqu XMMWORD[32+rsi],xmm9
|
|
vmovdqu XMMWORD[48+rsi],xmm10
|
|
vmovdqu XMMWORD[64+rsi],xmm11
|
|
vmovdqu XMMWORD[80+rsi],xmm12
|
|
|
|
vpxor xmm0,xmm0,xmm5
|
|
|
|
lea rdi,[96+rdi]
|
|
lea rsi,[96+rsi]
|
|
jmp NEAR $L$256_dec_loop1
|
|
|
|
$L$256_dec_finish_96:
|
|
vmovdqa xmm6,xmm12
|
|
vmovdqa XMMWORD[(16-32)+rax],xmm11
|
|
vmovdqa XMMWORD[(32-32)+rax],xmm10
|
|
vmovdqa XMMWORD[(48-32)+rax],xmm9
|
|
vmovdqa XMMWORD[(64-32)+rax],xmm8
|
|
vmovdqa XMMWORD[(80-32)+rax],xmm7
|
|
|
|
vmovdqu xmm4,XMMWORD[((0-32))+rcx]
|
|
vpclmulqdq xmm1,xmm6,xmm4,0x10
|
|
vpclmulqdq xmm2,xmm6,xmm4,0x11
|
|
vpclmulqdq xmm3,xmm6,xmm4,0x00
|
|
vpclmulqdq xmm4,xmm6,xmm4,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[((-16))+rax]
|
|
vmovdqu xmm13,XMMWORD[((-16))+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[rax]
|
|
vmovdqu xmm13,XMMWORD[rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[16+rax]
|
|
vmovdqu xmm13,XMMWORD[16+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vmovdqu xmm6,XMMWORD[32+rax]
|
|
vmovdqu xmm13,XMMWORD[32+rcx]
|
|
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm13,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
|
|
vmovdqu xmm6,XMMWORD[((80-32))+rax]
|
|
vpxor xmm6,xmm6,xmm0
|
|
vmovdqu xmm5,XMMWORD[((80-32))+rcx]
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x11
|
|
vpxor xmm2,xmm2,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x00
|
|
vpxor xmm3,xmm3,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x10
|
|
vpxor xmm1,xmm1,xmm4
|
|
vpclmulqdq xmm4,xmm6,xmm5,0x01
|
|
vpxor xmm1,xmm1,xmm4
|
|
|
|
vpsrldq xmm4,xmm1,8
|
|
vpxor xmm5,xmm2,xmm4
|
|
vpslldq xmm4,xmm1,8
|
|
vpxor xmm0,xmm3,xmm4
|
|
|
|
vmovdqa xmm3,XMMWORD[poly]
|
|
|
|
vpalignr xmm2,xmm0,xmm0,8
|
|
vpclmulqdq xmm0,xmm0,xmm3,0x10
|
|
vpxor xmm0,xmm2,xmm0
|
|
|
|
vpalignr xmm2,xmm0,xmm0,8
|
|
vpclmulqdq xmm0,xmm0,xmm3,0x10
|
|
vpxor xmm0,xmm2,xmm0
|
|
|
|
vpxor xmm0,xmm0,xmm5
|
|
|
|
$L$256_dec_loop2:
|
|
|
|
|
|
|
|
cmp r9,16
|
|
jb NEAR $L$256_dec_out
|
|
sub r9,16
|
|
|
|
vmovdqa xmm2,xmm15
|
|
vpaddd xmm15,xmm15,XMMWORD[one]
|
|
|
|
vpxor xmm2,xmm2,XMMWORD[r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[16+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[32+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[48+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[64+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[80+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[96+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[112+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[128+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[144+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[160+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[176+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[192+r8]
|
|
vaesenc xmm2,xmm2,XMMWORD[208+r8]
|
|
vaesenclast xmm2,xmm2,XMMWORD[224+r8]
|
|
vpxor xmm2,xmm2,XMMWORD[rdi]
|
|
vmovdqu XMMWORD[rsi],xmm2
|
|
add rdi,16
|
|
add rsi,16
|
|
|
|
vpxor xmm0,xmm0,xmm2
|
|
vmovdqa xmm1,XMMWORD[((-32))+rcx]
|
|
call GFMUL
|
|
|
|
jmp NEAR $L$256_dec_loop2
|
|
|
|
$L$256_dec_out:
|
|
vmovdqu XMMWORD[rdx],xmm0
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes256gcmsiv_dec:
|
|
global aes256gcmsiv_kdf
|
|
|
|
ALIGN 16
|
|
aes256gcmsiv_kdf:
|
|
mov QWORD[8+rsp],rdi ;WIN64 prologue
|
|
mov QWORD[16+rsp],rsi
|
|
mov rax,rsp
|
|
$L$SEH_begin_aes256gcmsiv_kdf:
|
|
mov rdi,rcx
|
|
mov rsi,rdx
|
|
mov rdx,r8
|
|
|
|
|
|
|
|
_CET_ENDBR
|
|
|
|
|
|
|
|
|
|
vmovdqa xmm1,XMMWORD[rdx]
|
|
vmovdqa xmm4,XMMWORD[rdi]
|
|
vmovdqa xmm11,XMMWORD[and_mask]
|
|
vmovdqa xmm8,XMMWORD[one]
|
|
vpshufd xmm4,xmm4,0x90
|
|
vpand xmm4,xmm4,xmm11
|
|
vpaddd xmm6,xmm4,xmm8
|
|
vpaddd xmm7,xmm6,xmm8
|
|
vpaddd xmm11,xmm7,xmm8
|
|
vpaddd xmm12,xmm11,xmm8
|
|
vpaddd xmm13,xmm12,xmm8
|
|
|
|
vpxor xmm4,xmm4,xmm1
|
|
vpxor xmm6,xmm6,xmm1
|
|
vpxor xmm7,xmm7,xmm1
|
|
vpxor xmm11,xmm11,xmm1
|
|
vpxor xmm12,xmm12,xmm1
|
|
vpxor xmm13,xmm13,xmm1
|
|
|
|
vmovdqa xmm1,XMMWORD[16+rdx]
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vaesenc xmm6,xmm6,xmm1
|
|
vaesenc xmm7,xmm7,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
vaesenc xmm13,xmm13,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[32+rdx]
|
|
vaesenc xmm4,xmm4,xmm2
|
|
vaesenc xmm6,xmm6,xmm2
|
|
vaesenc xmm7,xmm7,xmm2
|
|
vaesenc xmm11,xmm11,xmm2
|
|
vaesenc xmm12,xmm12,xmm2
|
|
vaesenc xmm13,xmm13,xmm2
|
|
|
|
vmovdqa xmm1,XMMWORD[48+rdx]
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vaesenc xmm6,xmm6,xmm1
|
|
vaesenc xmm7,xmm7,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
vaesenc xmm13,xmm13,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[64+rdx]
|
|
vaesenc xmm4,xmm4,xmm2
|
|
vaesenc xmm6,xmm6,xmm2
|
|
vaesenc xmm7,xmm7,xmm2
|
|
vaesenc xmm11,xmm11,xmm2
|
|
vaesenc xmm12,xmm12,xmm2
|
|
vaesenc xmm13,xmm13,xmm2
|
|
|
|
vmovdqa xmm1,XMMWORD[80+rdx]
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vaesenc xmm6,xmm6,xmm1
|
|
vaesenc xmm7,xmm7,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
vaesenc xmm13,xmm13,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[96+rdx]
|
|
vaesenc xmm4,xmm4,xmm2
|
|
vaesenc xmm6,xmm6,xmm2
|
|
vaesenc xmm7,xmm7,xmm2
|
|
vaesenc xmm11,xmm11,xmm2
|
|
vaesenc xmm12,xmm12,xmm2
|
|
vaesenc xmm13,xmm13,xmm2
|
|
|
|
vmovdqa xmm1,XMMWORD[112+rdx]
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vaesenc xmm6,xmm6,xmm1
|
|
vaesenc xmm7,xmm7,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
vaesenc xmm13,xmm13,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[128+rdx]
|
|
vaesenc xmm4,xmm4,xmm2
|
|
vaesenc xmm6,xmm6,xmm2
|
|
vaesenc xmm7,xmm7,xmm2
|
|
vaesenc xmm11,xmm11,xmm2
|
|
vaesenc xmm12,xmm12,xmm2
|
|
vaesenc xmm13,xmm13,xmm2
|
|
|
|
vmovdqa xmm1,XMMWORD[144+rdx]
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vaesenc xmm6,xmm6,xmm1
|
|
vaesenc xmm7,xmm7,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
vaesenc xmm13,xmm13,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[160+rdx]
|
|
vaesenc xmm4,xmm4,xmm2
|
|
vaesenc xmm6,xmm6,xmm2
|
|
vaesenc xmm7,xmm7,xmm2
|
|
vaesenc xmm11,xmm11,xmm2
|
|
vaesenc xmm12,xmm12,xmm2
|
|
vaesenc xmm13,xmm13,xmm2
|
|
|
|
vmovdqa xmm1,XMMWORD[176+rdx]
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vaesenc xmm6,xmm6,xmm1
|
|
vaesenc xmm7,xmm7,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
vaesenc xmm13,xmm13,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[192+rdx]
|
|
vaesenc xmm4,xmm4,xmm2
|
|
vaesenc xmm6,xmm6,xmm2
|
|
vaesenc xmm7,xmm7,xmm2
|
|
vaesenc xmm11,xmm11,xmm2
|
|
vaesenc xmm12,xmm12,xmm2
|
|
vaesenc xmm13,xmm13,xmm2
|
|
|
|
vmovdqa xmm1,XMMWORD[208+rdx]
|
|
vaesenc xmm4,xmm4,xmm1
|
|
vaesenc xmm6,xmm6,xmm1
|
|
vaesenc xmm7,xmm7,xmm1
|
|
vaesenc xmm11,xmm11,xmm1
|
|
vaesenc xmm12,xmm12,xmm1
|
|
vaesenc xmm13,xmm13,xmm1
|
|
|
|
vmovdqa xmm2,XMMWORD[224+rdx]
|
|
vaesenclast xmm4,xmm4,xmm2
|
|
vaesenclast xmm6,xmm6,xmm2
|
|
vaesenclast xmm7,xmm7,xmm2
|
|
vaesenclast xmm11,xmm11,xmm2
|
|
vaesenclast xmm12,xmm12,xmm2
|
|
vaesenclast xmm13,xmm13,xmm2
|
|
|
|
|
|
vmovdqa XMMWORD[rsi],xmm4
|
|
vmovdqa XMMWORD[16+rsi],xmm6
|
|
vmovdqa XMMWORD[32+rsi],xmm7
|
|
vmovdqa XMMWORD[48+rsi],xmm11
|
|
vmovdqa XMMWORD[64+rsi],xmm12
|
|
vmovdqa XMMWORD[80+rsi],xmm13
|
|
mov rdi,QWORD[8+rsp] ;WIN64 epilogue
|
|
mov rsi,QWORD[16+rsp]
|
|
ret
|
|
|
|
$L$SEH_end_aes256gcmsiv_kdf:
|
|
%else
|
|
; Work around https://bugzilla.nasm.us/show_bug.cgi?id=3392738
|
|
ret
|
|
%endif
|