658 lines
15 KiB
NASM
658 lines
15 KiB
NASM
|
|
; ---------------------------------------------------------------------------
|
|
; Copyright (c) 1998-2007, Brian Gladman, Worcester, UK. All rights reserved.
|
|
;
|
|
; LICENSE TERMS
|
|
;
|
|
; The free distribution and use of this software is allowed (with or without
|
|
; changes) provided that:
|
|
;
|
|
; 1. source code distributions include the above copyright notice, this
|
|
; list of conditions and the following disclaimer;
|
|
;
|
|
; 2. binary distributions include the above copyright notice, this list
|
|
; of conditions and the following disclaimer in their documentation;
|
|
;
|
|
; 3. the name of the copyright holder is not used to endorse products
|
|
; built using this software without specific written permission.
|
|
;
|
|
; DISCLAIMER
|
|
;
|
|
; This software is provided 'as is' with no explicit or implied warranties
|
|
; in respect of its properties, including, but not limited to, correctness
|
|
; and/or fitness for purpose.
|
|
; ---------------------------------------------------------------------------
|
|
; Issue 20/12/2007
|
|
;
|
|
; This code requires ASM_X86_V1C to be set in aesopt.h. It requires the C files
|
|
; aeskey.c and aestab.c for support.
|
|
|
|
;
|
|
; Adapted for TrueCrypt:
|
|
; - Compatibility with NASM and GCC
|
|
;
|
|
|
|
; An AES implementation for x86 processors using the YASM (or NASM) assembler.
|
|
; This is an assembler implementation that covers encryption and decryption
|
|
; only and is intended as a replacement of the C file aescrypt.c. It hence
|
|
; requires the file aeskey.c for keying and aestab.c for the AES tables. It
|
|
; employs full tables rather than compressed tables.
|
|
|
|
; This code provides the standard AES block size (128 bits, 16 bytes) and the
|
|
; three standard AES key sizes (128, 192 and 256 bits). It has the same call
|
|
; interface as my C implementation. The ebx, esi, edi and ebp registers are
|
|
; preserved across calls but eax, ecx and edx and the artihmetic status flags
|
|
; are not. It is also important that the defines below match those used in the
|
|
; C code. This code uses the VC++ register saving conentions; if it is used
|
|
; with another compiler, conventions for using and saving registers may need to
|
|
; be checked (and calling conventions). The YASM command line for the VC++
|
|
; custom build step is:
|
|
;
|
|
; yasm -Xvc -f win32 -o "$(TargetDir)\$(InputName).obj" "$(InputPath)"
|
|
;
|
|
; The calling intefaces are:
|
|
;
|
|
; AES_RETURN aes_encrypt(const unsigned char in_blk[],
|
|
; unsigned char out_blk[], const aes_encrypt_ctx cx[1]);
|
|
;
|
|
; AES_RETURN aes_decrypt(const unsigned char in_blk[],
|
|
; unsigned char out_blk[], const aes_decrypt_ctx cx[1]);
|
|
;
|
|
; AES_RETURN aes_encrypt_key<NNN>(const unsigned char key[],
|
|
; const aes_encrypt_ctx cx[1]);
|
|
;
|
|
; AES_RETURN aes_decrypt_key<NNN>(const unsigned char key[],
|
|
; const aes_decrypt_ctx cx[1]);
|
|
;
|
|
; AES_RETURN aes_encrypt_key(const unsigned char key[],
|
|
; unsigned int len, const aes_decrypt_ctx cx[1]);
|
|
;
|
|
; AES_RETURN aes_decrypt_key(const unsigned char key[],
|
|
; unsigned int len, const aes_decrypt_ctx cx[1]);
|
|
;
|
|
; where <NNN> is 128, 102 or 256. In the last two calls the length can be in
|
|
; either bits or bytes.
|
|
;
|
|
; Comment in/out the following lines to obtain the desired subroutines. These
|
|
; selections MUST match those in the C header file aes.h
|
|
|
|
; %define AES_128 ; define if AES with 128 bit keys is needed
|
|
; %define AES_192 ; define if AES with 192 bit keys is needed
|
|
%define AES_256 ; define if AES with 256 bit keys is needed
|
|
; %define AES_VAR ; define if a variable key size is needed
|
|
%define ENCRYPTION ; define if encryption is needed
|
|
%define DECRYPTION ; define if decryption is needed
|
|
%define AES_REV_DKS ; define if key decryption schedule is reversed
|
|
%define LAST_ROUND_TABLES ; define if tables are to be used for last round
|
|
|
|
; offsets to parameters
|
|
|
|
in_blk equ 4 ; input byte array address parameter
|
|
out_blk equ 8 ; output byte array address parameter
|
|
ctx equ 12 ; AES context structure
|
|
stk_spc equ 20 ; stack space
|
|
%define parms 12 ; parameter space on stack
|
|
|
|
; The encryption key schedule has the following in memory layout where N is the
|
|
; number of rounds (10, 12 or 14):
|
|
;
|
|
; lo: | input key (round 0) | ; each round is four 32-bit words
|
|
; | encryption round 1 |
|
|
; | encryption round 2 |
|
|
; ....
|
|
; | encryption round N-1 |
|
|
; hi: | encryption round N |
|
|
;
|
|
; The decryption key schedule is normally set up so that it has the same
|
|
; layout as above by actually reversing the order of the encryption key
|
|
; schedule in memory (this happens when AES_REV_DKS is set):
|
|
;
|
|
; lo: | decryption round 0 | = | encryption round N |
|
|
; | decryption round 1 | = INV_MIX_COL[ | encryption round N-1 | ]
|
|
; | decryption round 2 | = INV_MIX_COL[ | encryption round N-2 | ]
|
|
; .... ....
|
|
; | decryption round N-1 | = INV_MIX_COL[ | encryption round 1 | ]
|
|
; hi: | decryption round N | = | input key (round 0) |
|
|
;
|
|
; with rounds except the first and last modified using inv_mix_column()
|
|
; But if AES_REV_DKS is NOT set the order of keys is left as it is for
|
|
; encryption so that it has to be accessed in reverse when used for
|
|
; decryption (although the inverse mix column modifications are done)
|
|
;
|
|
; lo: | decryption round 0 | = | input key (round 0) |
|
|
; | decryption round 1 | = INV_MIX_COL[ | encryption round 1 | ]
|
|
; | decryption round 2 | = INV_MIX_COL[ | encryption round 2 | ]
|
|
; .... ....
|
|
; | decryption round N-1 | = INV_MIX_COL[ | encryption round N-1 | ]
|
|
; hi: | decryption round N | = | encryption round N |
|
|
;
|
|
; This layout is faster when the assembler key scheduling provided here
|
|
; is used.
|
|
;
|
|
; The DLL interface must use the _stdcall convention in which the number
|
|
; of bytes of parameter space is added after an @ to the sutine's name.
|
|
; We must also remove our parameters from the stack before return (see
|
|
; the do_exit macro). Define DLL_EXPORT for the Dynamic Link Library version.
|
|
|
|
;%define DLL_EXPORT
|
|
|
|
; End of user defines
|
|
|
|
%ifdef AES_VAR
|
|
%ifndef AES_128
|
|
%define AES_128
|
|
%endif
|
|
%ifndef AES_192
|
|
%define AES_192
|
|
%endif
|
|
%ifndef AES_256
|
|
%define AES_256
|
|
%endif
|
|
%endif
|
|
|
|
%ifdef AES_VAR
|
|
%define KS_LENGTH 60
|
|
%elifdef AES_256
|
|
%define KS_LENGTH 60
|
|
%elifdef AES_192
|
|
%define KS_LENGTH 52
|
|
%else
|
|
%define KS_LENGTH 44
|
|
%endif
|
|
|
|
; These macros implement stack based local variables
|
|
|
|
%macro save 2
|
|
mov [esp+4*%1],%2
|
|
%endmacro
|
|
|
|
%macro restore 2
|
|
mov %1,[esp+4*%2]
|
|
%endmacro
|
|
|
|
; the DLL has to implement the _stdcall calling interface on return
|
|
; In this case we have to take our parameters (3 4-byte pointers)
|
|
; off the stack
|
|
|
|
%macro do_name 1-2 parms
|
|
%ifndef DLL_EXPORT
|
|
align 32
|
|
global %1
|
|
%1:
|
|
%else
|
|
align 32
|
|
global %1@%2
|
|
export _%1@%2
|
|
%1@%2:
|
|
%endif
|
|
%endmacro
|
|
|
|
%macro do_call 1-2 parms
|
|
%ifndef DLL_EXPORT
|
|
call %1
|
|
add esp,%2
|
|
%else
|
|
call %1@%2
|
|
%endif
|
|
%endmacro
|
|
|
|
%macro do_exit 0-1 parms
|
|
%ifdef DLL_EXPORT
|
|
ret %1
|
|
%else
|
|
ret
|
|
%endif
|
|
%endmacro
|
|
|
|
%ifdef ENCRYPTION
|
|
|
|
extern t_fn
|
|
|
|
%define etab_0(x) [t_fn+4*x]
|
|
%define etab_1(x) [t_fn+1024+4*x]
|
|
%define etab_2(x) [t_fn+2048+4*x]
|
|
%define etab_3(x) [t_fn+3072+4*x]
|
|
|
|
%ifdef LAST_ROUND_TABLES
|
|
|
|
extern t_fl
|
|
|
|
%define eltab_0(x) [t_fl+4*x]
|
|
%define eltab_1(x) [t_fl+1024+4*x]
|
|
%define eltab_2(x) [t_fl+2048+4*x]
|
|
%define eltab_3(x) [t_fl+3072+4*x]
|
|
|
|
%else
|
|
|
|
%define etab_b(x) byte [t_fn+3072+4*x]
|
|
|
|
%endif
|
|
|
|
; ROUND FUNCTION. Build column[2] on ESI and column[3] on EDI that have the
|
|
; round keys pre-loaded. Build column[0] in EBP and column[1] in EBX.
|
|
;
|
|
; Input:
|
|
;
|
|
; EAX column[0]
|
|
; EBX column[1]
|
|
; ECX column[2]
|
|
; EDX column[3]
|
|
; ESI column key[round][2]
|
|
; EDI column key[round][3]
|
|
; EBP scratch
|
|
;
|
|
; Output:
|
|
;
|
|
; EBP column[0] unkeyed
|
|
; EBX column[1] unkeyed
|
|
; ESI column[2] keyed
|
|
; EDI column[3] keyed
|
|
; EAX scratch
|
|
; ECX scratch
|
|
; EDX scratch
|
|
|
|
%macro rnd_fun 2
|
|
|
|
rol ebx,16
|
|
%1 esi, cl, 0, ebp
|
|
%1 esi, dh, 1, ebp
|
|
%1 esi, bh, 3, ebp
|
|
%1 edi, dl, 0, ebp
|
|
%1 edi, ah, 1, ebp
|
|
%1 edi, bl, 2, ebp
|
|
%2 ebp, al, 0, ebp
|
|
shr ebx,16
|
|
and eax,0xffff0000
|
|
or eax,ebx
|
|
shr edx,16
|
|
%1 ebp, ah, 1, ebx
|
|
%1 ebp, dh, 3, ebx
|
|
%2 ebx, dl, 2, ebx
|
|
%1 ebx, ch, 1, edx
|
|
%1 ebx, al, 0, edx
|
|
shr eax,16
|
|
shr ecx,16
|
|
%1 ebp, cl, 2, edx
|
|
%1 edi, ch, 3, edx
|
|
%1 esi, al, 2, edx
|
|
%1 ebx, ah, 3, edx
|
|
|
|
%endmacro
|
|
|
|
; Basic MOV and XOR Operations for normal rounds
|
|
|
|
%macro nr_xor 4
|
|
movzx %4,%2
|
|
xor %1,etab_%3(%4)
|
|
%endmacro
|
|
|
|
%macro nr_mov 4
|
|
movzx %4,%2
|
|
mov %1,etab_%3(%4)
|
|
%endmacro
|
|
|
|
; Basic MOV and XOR Operations for last round
|
|
|
|
%ifdef LAST_ROUND_TABLES
|
|
|
|
%macro lr_xor 4
|
|
movzx %4,%2
|
|
xor %1,eltab_%3(%4)
|
|
%endmacro
|
|
|
|
%macro lr_mov 4
|
|
movzx %4,%2
|
|
mov %1,eltab_%3(%4)
|
|
%endmacro
|
|
|
|
%else
|
|
|
|
%macro lr_xor 4
|
|
movzx %4,%2
|
|
movzx %4,etab_b(%4)
|
|
%if %3 != 0
|
|
shl %4,8*%3
|
|
%endif
|
|
xor %1,%4
|
|
%endmacro
|
|
|
|
%macro lr_mov 4
|
|
movzx %4,%2
|
|
movzx %1,etab_b(%4)
|
|
%if %3 != 0
|
|
shl %1,8*%3
|
|
%endif
|
|
%endmacro
|
|
|
|
%endif
|
|
|
|
%macro enc_round 0
|
|
|
|
add ebp,16
|
|
save 0,ebp
|
|
mov esi,[ebp+8]
|
|
mov edi,[ebp+12]
|
|
|
|
rnd_fun nr_xor, nr_mov
|
|
|
|
mov eax,ebp
|
|
mov ecx,esi
|
|
mov edx,edi
|
|
restore ebp,0
|
|
xor eax,[ebp]
|
|
xor ebx,[ebp+4]
|
|
|
|
%endmacro
|
|
|
|
%macro enc_last_round 0
|
|
|
|
add ebp,16
|
|
save 0,ebp
|
|
mov esi,[ebp+8]
|
|
mov edi,[ebp+12]
|
|
|
|
rnd_fun lr_xor, lr_mov
|
|
|
|
mov eax,ebp
|
|
restore ebp,0
|
|
xor eax,[ebp]
|
|
xor ebx,[ebp+4]
|
|
|
|
%endmacro
|
|
|
|
section .text align=32
|
|
|
|
; AES Encryption Subroutine
|
|
|
|
do_name aes_encrypt
|
|
|
|
sub esp,stk_spc
|
|
mov [esp+16],ebp
|
|
mov [esp+12],ebx
|
|
mov [esp+ 8],esi
|
|
mov [esp+ 4],edi
|
|
|
|
mov esi,[esp+in_blk+stk_spc] ; input pointer
|
|
mov eax,[esi ]
|
|
mov ebx,[esi+ 4]
|
|
mov ecx,[esi+ 8]
|
|
mov edx,[esi+12]
|
|
|
|
mov ebp,[esp+ctx+stk_spc] ; key pointer
|
|
movzx edi,byte [ebp+4*KS_LENGTH]
|
|
xor eax,[ebp ]
|
|
xor ebx,[ebp+ 4]
|
|
xor ecx,[ebp+ 8]
|
|
xor edx,[ebp+12]
|
|
|
|
; determine the number of rounds
|
|
|
|
cmp edi,10*16
|
|
je .3
|
|
cmp edi,12*16
|
|
je .2
|
|
cmp edi,14*16
|
|
je .1
|
|
mov eax,-1
|
|
jmp .5
|
|
|
|
.1: enc_round
|
|
enc_round
|
|
.2: enc_round
|
|
enc_round
|
|
.3: enc_round
|
|
enc_round
|
|
enc_round
|
|
enc_round
|
|
enc_round
|
|
enc_round
|
|
enc_round
|
|
enc_round
|
|
enc_round
|
|
enc_last_round
|
|
|
|
mov edx,[esp+out_blk+stk_spc]
|
|
mov [edx],eax
|
|
mov [edx+4],ebx
|
|
mov [edx+8],esi
|
|
mov [edx+12],edi
|
|
xor eax,eax
|
|
|
|
.5: mov ebp,[esp+16]
|
|
mov ebx,[esp+12]
|
|
mov esi,[esp+ 8]
|
|
mov edi,[esp+ 4]
|
|
add esp,stk_spc
|
|
do_exit
|
|
|
|
%endif
|
|
|
|
%ifdef DECRYPTION
|
|
|
|
extern t_in
|
|
|
|
%define dtab_0(x) [t_in+4*x]
|
|
%define dtab_1(x) [t_in+1024+4*x]
|
|
%define dtab_2(x) [t_in+2048+4*x]
|
|
%define dtab_3(x) [t_in+3072+4*x]
|
|
|
|
%ifdef LAST_ROUND_TABLES
|
|
|
|
extern t_il
|
|
|
|
%define dltab_0(x) [t_il+4*x]
|
|
%define dltab_1(x) [t_il+1024+4*x]
|
|
%define dltab_2(x) [t_il+2048+4*x]
|
|
%define dltab_3(x) [t_il+3072+4*x]
|
|
|
|
%else
|
|
|
|
extern _t_ibox
|
|
|
|
%define dtab_x(x) byte [_t_ibox+x]
|
|
|
|
%endif
|
|
|
|
%macro irn_fun 2
|
|
|
|
rol eax,16
|
|
%1 esi, cl, 0, ebp
|
|
%1 esi, bh, 1, ebp
|
|
%1 esi, al, 2, ebp
|
|
%1 edi, dl, 0, ebp
|
|
%1 edi, ch, 1, ebp
|
|
%1 edi, ah, 3, ebp
|
|
%2 ebp, bl, 0, ebp
|
|
shr eax,16
|
|
and ebx,0xffff0000
|
|
or ebx,eax
|
|
shr ecx,16
|
|
%1 ebp, bh, 1, eax
|
|
%1 ebp, ch, 3, eax
|
|
%2 eax, cl, 2, ecx
|
|
%1 eax, bl, 0, ecx
|
|
%1 eax, dh, 1, ecx
|
|
shr ebx,16
|
|
shr edx,16
|
|
%1 esi, dh, 3, ecx
|
|
%1 ebp, dl, 2, ecx
|
|
%1 eax, bh, 3, ecx
|
|
%1 edi, bl, 2, ecx
|
|
|
|
%endmacro
|
|
|
|
; Basic MOV and XOR Operations for normal rounds
|
|
|
|
%macro ni_xor 4
|
|
movzx %4,%2
|
|
xor %1,dtab_%3(%4)
|
|
%endmacro
|
|
|
|
%macro ni_mov 4
|
|
movzx %4,%2
|
|
mov %1,dtab_%3(%4)
|
|
%endmacro
|
|
|
|
; Basic MOV and XOR Operations for last round
|
|
|
|
%ifdef LAST_ROUND_TABLES
|
|
|
|
%macro li_xor 4
|
|
movzx %4,%2
|
|
xor %1,dltab_%3(%4)
|
|
%endmacro
|
|
|
|
%macro li_mov 4
|
|
movzx %4,%2
|
|
mov %1,dltab_%3(%4)
|
|
%endmacro
|
|
|
|
%else
|
|
|
|
%macro li_xor 4
|
|
movzx %4,%2
|
|
movzx %4,dtab_x(%4)
|
|
%if %3 != 0
|
|
shl %4,8*%3
|
|
%endif
|
|
xor %1,%4
|
|
%endmacro
|
|
|
|
%macro li_mov 4
|
|
movzx %4,%2
|
|
movzx %1,dtab_x(%4)
|
|
%if %3 != 0
|
|
shl %1,8*%3
|
|
%endif
|
|
%endmacro
|
|
|
|
%endif
|
|
|
|
%macro dec_round 0
|
|
|
|
%ifdef AES_REV_DKS
|
|
add ebp,16
|
|
%else
|
|
sub ebp,16
|
|
%endif
|
|
save 0,ebp
|
|
mov esi,[ebp+8]
|
|
mov edi,[ebp+12]
|
|
|
|
irn_fun ni_xor, ni_mov
|
|
|
|
mov ebx,ebp
|
|
mov ecx,esi
|
|
mov edx,edi
|
|
restore ebp,0
|
|
xor eax,[ebp]
|
|
xor ebx,[ebp+4]
|
|
|
|
%endmacro
|
|
|
|
%macro dec_last_round 0
|
|
|
|
%ifdef AES_REV_DKS
|
|
add ebp,16
|
|
%else
|
|
sub ebp,16
|
|
%endif
|
|
save 0,ebp
|
|
mov esi,[ebp+8]
|
|
mov edi,[ebp+12]
|
|
|
|
irn_fun li_xor, li_mov
|
|
|
|
mov ebx,ebp
|
|
restore ebp,0
|
|
xor eax,[ebp]
|
|
xor ebx,[ebp+4]
|
|
|
|
%endmacro
|
|
|
|
section .text
|
|
|
|
; AES Decryption Subroutine
|
|
|
|
do_name aes_decrypt
|
|
|
|
sub esp,stk_spc
|
|
mov [esp+16],ebp
|
|
mov [esp+12],ebx
|
|
mov [esp+ 8],esi
|
|
mov [esp+ 4],edi
|
|
|
|
; input four columns and xor in first round key
|
|
|
|
mov esi,[esp+in_blk+stk_spc] ; input pointer
|
|
mov eax,[esi ]
|
|
mov ebx,[esi+ 4]
|
|
mov ecx,[esi+ 8]
|
|
mov edx,[esi+12]
|
|
lea esi,[esi+16]
|
|
|
|
mov ebp,[esp+ctx+stk_spc] ; key pointer
|
|
movzx edi,byte[ebp+4*KS_LENGTH]
|
|
%ifndef AES_REV_DKS ; if decryption key schedule is not reversed
|
|
lea ebp,[ebp+edi] ; we have to access it from the top down
|
|
%endif
|
|
xor eax,[ebp ] ; key schedule
|
|
xor ebx,[ebp+ 4]
|
|
xor ecx,[ebp+ 8]
|
|
xor edx,[ebp+12]
|
|
|
|
; determine the number of rounds
|
|
|
|
cmp edi,10*16
|
|
je .3
|
|
cmp edi,12*16
|
|
je .2
|
|
cmp edi,14*16
|
|
je .1
|
|
mov eax,-1
|
|
jmp .5
|
|
|
|
.1: dec_round
|
|
dec_round
|
|
.2: dec_round
|
|
dec_round
|
|
.3: dec_round
|
|
dec_round
|
|
dec_round
|
|
dec_round
|
|
dec_round
|
|
dec_round
|
|
dec_round
|
|
dec_round
|
|
dec_round
|
|
dec_last_round
|
|
|
|
; move final values to the output array.
|
|
|
|
mov ebp,[esp+out_blk+stk_spc]
|
|
mov [ebp],eax
|
|
mov [ebp+4],ebx
|
|
mov [ebp+8],esi
|
|
mov [ebp+12],edi
|
|
xor eax,eax
|
|
|
|
.5: mov ebp,[esp+16]
|
|
mov ebx,[esp+12]
|
|
mov esi,[esp+ 8]
|
|
mov edi,[esp+ 4]
|
|
add esp,stk_spc
|
|
do_exit
|
|
|
|
%endif
|
|
|
|
%ifidn __OUTPUT_FORMAT__,elf
|
|
section .note.GNU-stack noalloc noexec nowrite progbits
|
|
%endif
|
|
%ifidn __OUTPUT_FORMAT__,elf32
|
|
section .note.GNU-stack noalloc noexec nowrite progbits
|
|
%endif
|
|
%ifidn __OUTPUT_FORMAT__,elf64
|
|
section .note.GNU-stack noalloc noexec nowrite progbits
|
|
%endif
|
|
|