Cryptography: Optimize Whirlpool implementation by using public domain assembly code developed by Wei Dai

This commit is contained in:
Mounir IDRASSI 2015-12-30 06:57:34 +01:00
parent d462806ea9
commit 0c2c0a846d
17 changed files with 1547 additions and 1170 deletions

View File

@ -581,8 +581,8 @@ void derive_key_ripemd160 (char *pwd, int pwd_len, char *salt, int salt_len, uin
typedef struct hmac_whirlpool_ctx_struct
{
WHIRLPOOL_CTX ctx;
char buf[WHIRLPOOL_BLOCKSIZE];
char k[PKCS5_SALT_SIZE + 4]; /* enough to hold (salt_len + 4) and also the Whirlpool hash */
CRYPTOPP_ALIGN_DATA(16) char buf[WHIRLPOOL_BLOCKSIZE];
CRYPTOPP_ALIGN_DATA(16) char k[PKCS5_SALT_SIZE + 4]; /* enough to hold (salt_len + 4) and also the Whirlpool hash */
char u[WHIRLPOOL_DIGESTSIZE];
} hmac_whirlpool_ctx;

View File

@ -52,6 +52,7 @@ typedef unsigned __int32 TC_LARGEST_COMPILER_UINT;
typedef unsigned __int64 TC_LARGEST_COMPILER_UINT;
typedef __int64 int64;
typedef unsigned __int64 uint64;
#define LL(x) x##ui64
#endif
#else // !_MSC_VER
@ -68,6 +69,8 @@ typedef uint16_t uint16;
typedef uint32_t uint32;
typedef uint64_t uint64;
#define LL(x) x##ULL
#if UCHAR_MAX != 0xffU
#error UCHAR_MAX != 0xff
#endif

View File

@ -426,6 +426,10 @@
RelativePath=".\Aestab.c"
>
</File>
<File
RelativePath=".\cpu.c"
>
</File>
<File
RelativePath=".\Rmd160.c"
>
@ -468,6 +472,18 @@
RelativePath=".\Aestab.h"
>
</File>
<File
RelativePath=".\config.h"
>
</File>
<File
RelativePath=".\cpu.h"
>
</File>
<File
RelativePath=".\misc.h"
>
</File>
<File
RelativePath=".\Rmd160.h"
>

View File

@ -69,6 +69,7 @@
*/
#include "Common/Endian.h"
#include "Crypto/misc.h"
#define PLATFORM_BYTE_ORDER BYTE_ORDER
#define IS_LITTLE_ENDIAN LITTLE_ENDIAN
@ -89,18 +90,6 @@ extern "C"
#pragma intrinsic(memcpy)
#endif
#if 0 && defined(_MSC_VER)
#define rotl32 _lrotl
#define rotr32 _lrotr
#else
#define rotl32(x,n) (((x) << n) | ((x) >> (32 - n)))
#define rotr32(x,n) (((x) >> n) | ((x) << (32 - n)))
#endif
#if !defined(bswap_32)
#define bswap_32(x) ((rotr32((x), 24) & 0x00ff00ff) | (rotr32((x), 8) & 0xff00ff00))
#endif
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define SWAP_BYTES
#else
@ -427,12 +416,6 @@ VOID_RETURN sha256(unsigned char hval[], const unsigned char data[], unsigned lo
#define SHA512_MASK (SHA512_BLOCK_SIZE - 1)
#define rotr64(x,n) (((x) >> n) | ((x) << (64 - n)))
#if !defined(bswap_64)
#define bswap_64(x) (((uint_64t)(bswap_32((uint_32t)(x)))) << 32 | bswap_32((uint_32t)((x) >> 32)))
#endif
#if defined(SWAP_BYTES)
#define bsw_64(p,n) \
{ int _i = (n); while(_i--) ((uint_64t*)p)[_i] = bswap_64(((uint_64t*)p)[_i]); }

View File

@ -12,6 +12,7 @@ SOURCES = \
Aes_hw_cpu.asm \
Aeskey.c \
Aestab.c \
cpu.c \
Rmd160.c \
Serpent.c \
Sha2.c \

File diff suppressed because it is too large Load Diff

View File

@ -2,147 +2,23 @@
#define WHIRLPOOL_H 1
#include "Common/Tcdefs.h"
#include "config.h"
#if defined(__cplusplus)
extern "C"
{
#endif
#ifndef PORTABLE_C__
#define PORTABLE_C__
#include <limits.h>
/* Definition of minimum-width integer types
*
* u8 -> unsigned integer type, at least 8 bits, equivalent to unsigned char
* u16 -> unsigned integer type, at least 16 bits
* u32 -> unsigned integer type, at least 32 bits
*
* s8, s16, s32 -> signed counterparts of u8, u16, u32
*
* Always use macro's T8(), T16() or T32() to obtain exact-width results,
* i.e., to specify the size of the result of each expression.
*/
typedef signed char s8;
typedef unsigned char u8;
#if UINT_MAX >= 4294967295UL
typedef signed short s16;
typedef signed int s32;
typedef unsigned short u16;
typedef unsigned int u32;
#define ONE32 0xffffffffU
#else
typedef signed int s16;
typedef signed long s32;
typedef unsigned int u16;
typedef unsigned __int32 u32;
#define ONE32 0xffffffffUL
#endif
#define ONE8 0xffU
#define ONE16 0xffffU
#define T8(x) ((x) & ONE8)
#define T16(x) ((x) & ONE16)
#define T32(x) ((x) & ONE32)
#ifdef _MSC_VER
typedef unsigned __int64 u64;
typedef signed __int64 s64;
#define LL(v) (v##ui64)
#define ONE64 LL(0xffffffffffffffff)
#else /* !_MSC_VER */
typedef unsigned long long u64;
typedef signed long long s64;
#define LL(v) (v##ULL)
#define ONE64 LL(0xffffffffffffffff)
#endif /* ?_MSC_VER */
#define T64(x) ((x) & ONE64)
#define ROTR64(v, n) (((v) >> (n)) | T64((v) << (64 - (n))))
/*
* Note: the test is used to detect native 64-bit architectures;
* if the unsigned long is strictly greater than 32-bit, it is
* assumed to be at least 64-bit. This will not work correctly
* on (old) 36-bit architectures (PDP-11 for instance).
*
* On non-64-bit architectures, "long long" is used.
*/
/*
* U8TO32_BIG(c) returns the 32-bit value stored in big-endian convention
* in the unsigned char array pointed to by c.
*/
#define U8TO32_BIG(c) (((u32)T8(*(c)) << 24) | ((u32)T8(*((c) + 1)) << 16) | ((u32)T8(*((c) + 2)) << 8) | ((u32)T8(*((c) + 3))))
/*
* U8TO32_LITTLE(c) returns the 32-bit value stored in little-endian convention
* in the unsigned char array pointed to by c.
*/
#define U8TO32_LITTLE(c) (((u32)T8(*(c))) | ((u32)T8(*((c) + 1)) << 8) | (u32)T8(*((c) + 2)) << 16) | ((u32)T8(*((c) + 3)) << 24))
/*
* U8TO32_BIG(c, v) stores the 32-bit-value v in big-endian convention
* into the unsigned char array pointed to by c.
*/
#define U32TO8_BIG(c, v) do { u32 x = (v); u8 *d = (c); d[0] = T8(x >> 24); d[1] = T8(x >> 16); d[2] = T8(x >> 8); d[3] = T8(x); } while (0)
/*
* U8TO32_LITTLE(c, v) stores the 32-bit-value v in little-endian convention
* into the unsigned char array pointed to by c.
*/
#define U32TO8_LITTLE(c, v) do { u32 x = (v); u8 *d = (c); d[0] = T8(x); d[1] = T8(x >> 8); d[2] = T8(x >> 16); d[3] = T8(x >> 24); } while (0)
/*
* ROTL32(v, n) returns the value of the 32-bit unsigned value v after
* a rotation of n bits to the left. It might be replaced by the appropriate
* architecture-specific macro.
*
* It evaluates v and n twice.
*
* The compiler might emit a warning if n is the constant 0. The result
* is undefined if n is greater than 31.
*/
#define ROTL32(v, n) (T32((v) << (n)) | ((v) >> (32 - (n))))
/*
* Whirlpool-specific definitions.
*/
#define DIGESTBYTES 64
#define DIGESTBITS (8*DIGESTBYTES) /* 512 */
#define WBLOCKBYTES 64
#define WBLOCKBITS (8*WBLOCKBYTES) /* 512 */
#define LENGTHBYTES 32
#define LENGTHBITS (8*LENGTHBYTES) /* 256 */
typedef struct NESSIEstruct {
u8 bitLength[LENGTHBYTES]; /* global number of hashed bits (256-bit counter) */
u8 buffer[WBLOCKBYTES]; /* buffer of data to hash */
int bufferBits; /* current number of bits on the buffer */
int bufferPos; /* current (possibly incomplete) byte slot on the buffer */
u64 hash[DIGESTBYTES/8]; /* the hashing state */
} NESSIEstruct;
#endif /* PORTABLE_C__ */
typedef struct WHIRLPOOL_CTX {
uint64 countLo;
uint64 countHi;
CRYPTOPP_ALIGN_DATA(16) uint64 data[8];
CRYPTOPP_ALIGN_DATA(16) uint64 state[8];
} WHIRLPOOL_CTX;
// -------------
#if defined(__cplusplus)
extern "C" {
#endif
typedef NESSIEstruct WHIRLPOOL_CTX;
void WHIRLPOOL_add(const unsigned char * const source, unsigned __int32 sourceBits, struct NESSIEstruct * const structpointer);
void WHIRLPOOL_finalize(struct NESSIEstruct * const structpointer, unsigned char * const result);
void WHIRLPOOL_init(struct NESSIEstruct * const structpointer);
void WHIRLPOOL_add(const unsigned char * source, unsigned __int32 sourceBits, WHIRLPOOL_CTX * const ctx);
void WHIRLPOOL_finalize(WHIRLPOOL_CTX* const ctx, unsigned char * result);
void WHIRLPOOL_init(WHIRLPOOL_CTX* const ctx);
#if defined(__cplusplus)
}

138
src/Crypto/config.h Normal file
View File

@ -0,0 +1,138 @@
#ifndef CRYPTOPP_CONFIG_H
#define CRYPTOPP_CONFIG_H
#ifdef __GNUC__
#define VC_INLINE static inline __attribute__((always_inline))
#elif defined (_MSC_VER)
#define VC_INLINE __forceinline
#else
#define VC_INLINE static inline
#endif
#ifdef __GNUC__
#define CRYPTOPP_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#endif
#ifndef CRYPTOPP_L1_CACHE_LINE_SIZE
// This should be a lower bound on the L1 cache line size. It's used for defense against timing attacks.
#if defined(_M_X64) || defined(__x86_64__)
#define CRYPTOPP_L1_CACHE_LINE_SIZE 64
#else
// L1 cache line size is 32 on Pentium III and earlier
#define CRYPTOPP_L1_CACHE_LINE_SIZE 32
#endif
#endif
#ifndef CRYPTOPP_ALIGN_DATA
#if defined(_MSC_VER)
#define CRYPTOPP_ALIGN_DATA(x) __declspec(align(x))
#elif defined(__GNUC__)
#define CRYPTOPP_ALIGN_DATA(x) __attribute__((aligned(x)))
#else
#define CRYPTOPP_ALIGN_DATA(x)
#endif
#endif
#ifndef CRYPTOPP_SECTION_ALIGN16
#if defined(__GNUC__) && !defined(__APPLE__)
// the alignment attribute doesn't seem to work without this section attribute when -fdata-sections is turned on
#define CRYPTOPP_SECTION_ALIGN16 __attribute__((section ("CryptoPP_Align16")))
#else
#define CRYPTOPP_SECTION_ALIGN16
#endif
#endif
#if defined(_MSC_VER) || defined(__fastcall)
#define CRYPTOPP_FASTCALL __fastcall
#else
#define CRYPTOPP_FASTCALL
#endif
#ifdef CRYPTOPP_DISABLE_X86ASM // for backwards compatibility: this macro had both meanings
#define CRYPTOPP_DISABLE_ASM
#define CRYPTOPP_DISABLE_SSE2
#endif
#if !defined(CRYPTOPP_DISABLE_ASM) && ((defined(_MSC_VER) && defined(_M_IX86)) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__))))
// C++Builder 2010 does not allow "call label" where label is defined within inline assembly
#define CRYPTOPP_X86_ASM_AVAILABLE
#if !defined(CRYPTOPP_DISABLE_SSE2) && (defined(_MSC_VER) || CRYPTOPP_GCC_VERSION >= 30300)
#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 0
#endif
// SSSE3 was actually introduced in GNU as 2.17, which was released 6/23/2006, but we can't tell what version of binutils is installed.
// GCC 4.1.2 was released on 2/13/2007, so we'll use that as a proxy for the binutils version.
#if !defined(CRYPTOPP_DISABLE_SSSE3) && (_MSC_VER >= 1400 || CRYPTOPP_GCC_VERSION >= 40102)
#define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_SSSE3_ASM_AVAILABLE 0
#endif
#endif
#if !defined(CRYPTOPP_DISABLE_ASM) && defined(_MSC_VER) && defined(_M_X64)
#define CRYPTOPP_X64_MASM_AVAILABLE
#endif
#if !defined(CRYPTOPP_DISABLE_ASM) && defined(__GNUC__) && defined(__x86_64__)
#define CRYPTOPP_X64_ASM_AVAILABLE
#endif
#if !defined(CRYPTOPP_DISABLE_SSE2) && (defined(_MSC_VER) || defined(__SSE2__))
#define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE 0
#endif
#if !defined(CRYPTOPP_DISABLE_SSSE3) && !defined(CRYPTOPP_DISABLE_AESNI) && CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && (CRYPTOPP_GCC_VERSION >= 40400 || _MSC_FULL_VER >= 150030729 || __INTEL_COMPILER >= 1110)
#define CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE 1
#else
#define CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE 0
#endif
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE || CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE || defined(CRYPTOPP_X64_MASM_AVAILABLE)
#define CRYPTOPP_BOOL_ALIGN16_ENABLED 1
#else
#define CRYPTOPP_BOOL_ALIGN16_ENABLED 0
#endif
// how to allocate 16-byte aligned memory (for SSE2)
#if defined(_MSC_VER)
#define CRYPTOPP_MM_MALLOC_AVAILABLE
#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__)
#define CRYPTOPP_MALLOC_ALIGNMENT_IS_16
#elif defined(__linux__) || defined(__sun__) || defined(__CYGWIN__)
#define CRYPTOPP_MEMALIGN_AVAILABLE
#else
#define CRYPTOPP_NO_ALIGNED_ALLOC
#endif
// how to declare class constants
#if (defined(_MSC_VER) && _MSC_VER <= 1300) || defined(__INTEL_COMPILER)
# define CRYPTOPP_CONSTANT(x) enum {x};
#else
# define CRYPTOPP_CONSTANT(x) static const int x;
#endif
#if defined(_M_X64) || defined(__x86_64__)
#define CRYPTOPP_BOOL_X64 1
#else
#define CRYPTOPP_BOOL_X64 0
#endif
// see http://predef.sourceforge.net/prearch.html
#if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(_X86_) || defined(__I86__) || defined(__INTEL__)
#define CRYPTOPP_BOOL_X86 1
#else
#define CRYPTOPP_BOOL_X86 0
#endif
#if CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86 || defined(__powerpc__)
#define CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
#endif
#define GETBYTE(x, y) (unsigned int)((unsigned char)((x)>>(8*(y))))
#endif

198
src/Crypto/cpu.c Normal file
View File

@ -0,0 +1,198 @@
/* cpu.c - written and placed in the public domain by Wei Dai */
#include "cpu.h"
#include "misc.h"
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#include <signal.h>
#include <setjmp.h>
#endif
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
#include <emmintrin.h>
#endif
#ifdef CRYPTOPP_CPUID_AVAILABLE
#if _MSC_VER >= 1400 && CRYPTOPP_BOOL_X64
int CpuId(uint32 input, uint32 *output)
{
__cpuid((int *)output, input);
return 1;
}
#else
#ifndef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#if defined(__cplusplus)
extern "C" {
#endif
typedef void (*SigHandler)(int);
static jmp_buf s_jmpNoCPUID;
static void SigIllHandlerCPUID(int p)
{
longjmp(s_jmpNoCPUID, 1);
}
#if CRYPTOPP_BOOL_X64 == 0
static jmp_buf s_jmpNoSSE2;
static void SigIllHandlerSSE2(int p)
{
longjmp(s_jmpNoSSE2, 1);
}
#endif
#if defined(__cplusplus)
}
#endif
#endif
int CpuId(uint32 input, uint32 *output)
{
#ifdef CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
__try
{
__asm
{
mov eax, input
cpuid
mov edi, output
mov [edi], eax
mov [edi+4], ebx
mov [edi+8], ecx
mov [edi+12], edx
}
}
__except (1)
{
return 0;
}
return 1;
#else
SigHandler oldHandler = signal(SIGILL, SigIllHandlerCPUID);
if (oldHandler == SIG_ERR)
return 0;
int result = 1;
if (setjmp(s_jmpNoCPUID))
result = 0;
else
{
asm
(
// save ebx in case -fPIC is being used
#if CRYPTOPP_BOOL_X86
"push %%ebx; cpuid; mov %%ebx, %%edi; pop %%ebx"
#else
"pushq %%rbx; cpuid; mov %%ebx, %%edi; popq %%rbx"
#endif
: "=a" (output[0]), "=D" (output[1]), "=c" (output[2]), "=d" (output[3])
: "a" (input)
);
}
signal(SIGILL, oldHandler);
return result;
#endif
}
#endif
static int TrySSE2()
{
#if CRYPTOPP_BOOL_X64
return 1;
#elif defined(CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY)
__try
{
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
AS2(por xmm0, xmm0) // executing SSE2 instruction
#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
__m128i x = _mm_setzero_si128();
return _mm_cvtsi128_si32(x) == 0;
#endif
}
__except (1)
{
return 0;
}
return 1;
#else
SigHandler oldHandler = signal(SIGILL, SigIllHandlerSSE2);
if (oldHandler == SIG_ERR)
return 0;
int result = 1;
if (setjmp(s_jmpNoSSE2))
result = 0;
else
{
#if CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE
__asm __volatile ("por %xmm0, %xmm0");
#elif CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
__m128i x = _mm_setzero_si128();
result = _mm_cvtsi128_si32(x) == 0;
#endif
}
signal(SIGILL, oldHandler);
return result;
#endif
}
int g_x86DetectionDone = 0;
int g_hasISSE = 0, g_hasSSE2 = 0, g_hasSSSE3 = 0, g_hasMMX = 0, g_hasAESNI = 0, g_hasCLMUL = 0, g_isP4 = 0;
uint32 g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
void DetectX86Features()
{
uint32 cpuid[4], cpuid1[4], tmp;
if (!CpuId(0, cpuid))
return;
if (!CpuId(1, cpuid1))
return;
g_hasMMX = (cpuid1[3] & (1 << 23)) != 0;
if ((cpuid1[3] & (1 << 26)) != 0)
g_hasSSE2 = TrySSE2();
g_hasSSSE3 = g_hasSSE2 && (cpuid1[2] & (1<<9));
g_hasAESNI = g_hasSSE2 && (cpuid1[2] & (1<<25));
g_hasCLMUL = g_hasSSE2 && (cpuid1[2] & (1<<1));
if ((cpuid1[3] & (1 << 25)) != 0)
g_hasISSE = 1;
else
{
uint32 cpuid2[4];
CpuId(0x080000000, cpuid2);
if (cpuid2[0] >= 0x080000001)
{
CpuId(0x080000001, cpuid2);
g_hasISSE = (cpuid2[3] & (1 << 22)) != 0;
}
}
tmp = cpuid[2];
cpuid[2] = cpuid[3];
cpuid[3] = tmp;
if (memcmp(cpuid+1, "GenuineIntel", 12) == 0)
{
g_isP4 = ((cpuid1[0] >> 8) & 0xf) == 0xf;
g_cacheLineSize = 8 * GETBYTE(cpuid1[1], 1);
}
else if (memcmp(cpuid+1, "AuthenticAMD", 12) == 0)
{
CpuId(0x80000005, cpuid);
g_cacheLineSize = GETBYTE(cpuid[2], 0);
}
if (!g_cacheLineSize)
g_cacheLineSize = CRYPTOPP_L1_CACHE_LINE_SIZE;
g_x86DetectionDone = 1;
}
#endif

258
src/Crypto/cpu.h Normal file
View File

@ -0,0 +1,258 @@
#ifndef CRYPTOPP_CPU_H
#define CRYPTOPP_CPU_H
#include "Common/Tcdefs.h"
#ifdef CRYPTOPP_GENERATE_X64_MASM
#define CRYPTOPP_X86_ASM_AVAILABLE
#define CRYPTOPP_BOOL_X64 1
#define CRYPTOPP_BOOL_SSE2_ASM_AVAILABLE 1
#else
#include "config.h"
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
#include <emmintrin.h>
#endif
#if CRYPTOPP_BOOL_AESNI_INTRINSICS_AVAILABLE
#if defined(__SSSE3__) || defined(__INTEL_COMPILER)
#ifdef TC_WINDOWS_DRIVER
extern __m128i _mm_shuffle_epi8 (__m128i a, __m128i b);
#else
#include <tmmintrin.h>
#endif
#endif
#if defined(__SSE4_1__) || defined(__INTEL_COMPILER)
#ifdef TC_WINDOWS_DRIVER
extern int _mm_extract_epi32(__m128i src, const int ndx);
extern __m128i _mm_insert_epi32(__m128i dst, int s, const int ndx);
#else
#include <smmintrin.h>
#endif
#endif
#if (defined(__AES__) && defined(__PCLMUL__)) || defined(__INTEL_COMPILER)
#ifdef TC_WINDOWS_DRIVER
extern __m128i _mm_clmulepi64_si128(__m128i v1, __m128i v2,
const int imm8);
extern __m128i _mm_aeskeygenassist_si128(__m128i ckey, const int rcon);
extern __m128i _mm_aesimc_si128(__m128i v);
extern __m128i _mm_aesenc_si128(__m128i v, __m128i rkey);
extern __m128i _mm_aesenclast_si128(__m128i v, __m128i rkey);
extern __m128i _mm_aesdec_si128(__m128i v, __m128i rkey);
extern __m128i _mm_aesdeclast_si128(__m128i v, __m128i rkey);
#else
#include <wmmintrin.h>
#endif
#endif
#endif
#if CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X64
#define CRYPTOPP_CPUID_AVAILABLE
#if defined(__cplusplus)
extern "C" {
#endif
// these should not be used directly
extern int g_x86DetectionDone;
extern int g_hasSSSE3;
extern int g_hasAESNI;
extern int g_hasCLMUL;
extern int g_isP4;
extern uint32 g_cacheLineSize;
void DetectX86Features(); // must be called at the start of the program/driver
int CpuId(uint32 input, uint32 *output);
#if CRYPTOPP_BOOL_X64
#define HasSSE2() 1
#define HasISSE() 1
#define HasMMX() 1
#else
extern int g_hasSSE2;
extern int g_hasISSE;
extern int g_hasMMX;
#define HasSSE2() g_hasSSE2
#define HasISSE() g_hasISSE
#define HasMMX() g_hasMMX
#endif
#define HasSSSE3() g_hasSSSE3
#define HasAESNI() g_hasAESNI
#define HasCLMUL() g_hasCLMUL
#define IsP4() g_isP4
#define GetCacheLineSize() g_cacheLineSize
#if defined(__cplusplus)
}
#endif
#else
#define GetCacheLineSize() CRYPTOPP_L1_CACHE_LINE_SIZE
#endif
#endif
#ifdef CRYPTOPP_GENERATE_X64_MASM
#define AS1(x) x*newline*
#define AS2(x, y) x, y*newline*
#define AS3(x, y, z) x, y, z*newline*
#define ASS(x, y, a, b, c, d) x, y, a*64+b*16+c*4+d*newline*
#define ASL(x) label##x:*newline*
#define ASJ(x, y, z) x label##y*newline*
#define ASC(x, y) x label##y*newline*
#define AS_HEX(y) 0##y##h
#elif defined(_MSC_VER) || defined(__BORLANDC__)
#define CRYPTOPP_MS_STYLE_INLINE_ASSEMBLY
#define AS1(x) __asm {x}
#define AS2(x, y) __asm {x, y}
#define AS3(x, y, z) __asm {x, y, z}
#define ASS(x, y, a, b, c, d) __asm {x, y, (a)*64+(b)*16+(c)*4+(d)}
#define ASL(x) __asm {label##x:}
#define ASJ(x, y, z) __asm {x label##y}
#define ASC(x, y) __asm {x label##y}
#define CRYPTOPP_NAKED __declspec(naked)
#define AS_HEX(y) 0x##y
#else
#define CRYPTOPP_GNU_STYLE_INLINE_ASSEMBLY
// define these in two steps to allow arguments to be expanded
#define GNU_AS1(x) #x ";"
#define GNU_AS2(x, y) #x ", " #y ";"
#define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
#define GNU_ASL(x) "\n" #x ":"
#define GNU_ASJ(x, y, z) #x " " #y #z ";"
#define AS1(x) GNU_AS1(x)
#define AS2(x, y) GNU_AS2(x, y)
#define AS3(x, y, z) GNU_AS3(x, y, z)
#define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
#define ASL(x) GNU_ASL(x)
#define ASJ(x, y, z) GNU_ASJ(x, y, z)
#define ASC(x, y) #x " " #y ";"
#define CRYPTOPP_NAKED
#define AS_HEX(y) 0x##y
#endif
#define IF0(y)
#define IF1(y) y
#ifdef CRYPTOPP_GENERATE_X64_MASM
#define ASM_MOD(x, y) ((x) MOD (y))
#define XMMWORD_PTR XMMWORD PTR
#else
// GNU assembler doesn't seem to have mod operator
#define ASM_MOD(x, y) ((x)-((x)/(y))*(y))
// GAS 2.15 doesn't support XMMWORD PTR. it seems necessary only for MASM
#define XMMWORD_PTR
#endif
#if CRYPTOPP_BOOL_X86
#define AS_REG_1 ecx
#define AS_REG_2 edx
#define AS_REG_3 esi
#define AS_REG_4 edi
#define AS_REG_5 eax
#define AS_REG_6 ebx
#define AS_REG_7 ebp
#define AS_REG_1d ecx
#define AS_REG_2d edx
#define AS_REG_3d esi
#define AS_REG_4d edi
#define AS_REG_5d eax
#define AS_REG_6d ebx
#define AS_REG_7d ebp
#define WORD_SZ 4
#define WORD_REG(x) e##x
#define WORD_PTR DWORD PTR
#define AS_PUSH_IF86(x) AS1(push e##x)
#define AS_POP_IF86(x) AS1(pop e##x)
#define AS_JCXZ jecxz
#elif CRYPTOPP_BOOL_X64
#ifdef CRYPTOPP_GENERATE_X64_MASM
#define AS_REG_1 rcx
#define AS_REG_2 rdx
#define AS_REG_3 r8
#define AS_REG_4 r9
#define AS_REG_5 rax
#define AS_REG_6 r10
#define AS_REG_7 r11
#define AS_REG_1d ecx
#define AS_REG_2d edx
#define AS_REG_3d r8d
#define AS_REG_4d r9d
#define AS_REG_5d eax
#define AS_REG_6d r10d
#define AS_REG_7d r11d
#else
#define AS_REG_1 rdi
#define AS_REG_2 rsi
#define AS_REG_3 rdx
#define AS_REG_4 rcx
#define AS_REG_5 r8
#define AS_REG_6 r9
#define AS_REG_7 r10
#define AS_REG_1d edi
#define AS_REG_2d esi
#define AS_REG_3d edx
#define AS_REG_4d ecx
#define AS_REG_5d r8d
#define AS_REG_6d r9d
#define AS_REG_7d r10d
#endif
#define WORD_SZ 8
#define WORD_REG(x) r##x
#define WORD_PTR QWORD PTR
#define AS_PUSH_IF86(x)
#define AS_POP_IF86(x)
#define AS_JCXZ jrcxz
#endif
// helper macro for stream cipher output
#define AS_XMM_OUTPUT4(labelPrefix, inputPtr, outputPtr, x0, x1, x2, x3, t, p0, p1, p2, p3, increment)\
AS2( test inputPtr, inputPtr)\
ASC( jz, labelPrefix##3)\
AS2( test inputPtr, 15)\
ASC( jnz, labelPrefix##7)\
AS2( pxor xmm##x0, [inputPtr+p0*16])\
AS2( pxor xmm##x1, [inputPtr+p1*16])\
AS2( pxor xmm##x2, [inputPtr+p2*16])\
AS2( pxor xmm##x3, [inputPtr+p3*16])\
AS2( add inputPtr, increment*16)\
ASC( jmp, labelPrefix##3)\
ASL(labelPrefix##7)\
AS2( movdqu xmm##t, [inputPtr+p0*16])\
AS2( pxor xmm##x0, xmm##t)\
AS2( movdqu xmm##t, [inputPtr+p1*16])\
AS2( pxor xmm##x1, xmm##t)\
AS2( movdqu xmm##t, [inputPtr+p2*16])\
AS2( pxor xmm##x2, xmm##t)\
AS2( movdqu xmm##t, [inputPtr+p3*16])\
AS2( pxor xmm##x3, xmm##t)\
AS2( add inputPtr, increment*16)\
ASL(labelPrefix##3)\
AS2( test outputPtr, 15)\
ASC( jnz, labelPrefix##8)\
AS2( movdqa [outputPtr+p0*16], xmm##x0)\
AS2( movdqa [outputPtr+p1*16], xmm##x1)\
AS2( movdqa [outputPtr+p2*16], xmm##x2)\
AS2( movdqa [outputPtr+p3*16], xmm##x3)\
ASC( jmp, labelPrefix##9)\
ASL(labelPrefix##8)\
AS2( movdqu [outputPtr+p0*16], xmm##x0)\
AS2( movdqu [outputPtr+p1*16], xmm##x1)\
AS2( movdqu [outputPtr+p2*16], xmm##x2)\
AS2( movdqu [outputPtr+p3*16], xmm##x3)\
ASL(labelPrefix##9)\
AS2( add outputPtr, increment*16)
#endif

132
src/Crypto/misc.h Normal file
View File

@ -0,0 +1,132 @@
#ifndef CRYPTOPP_MISC_H
#define CRYPTOPP_MISC_H
#include "config.h"
#include <string.h> // for memcpy and memmove
#ifdef _MSC_VER
#if _MSC_VER >= 1400
#ifndef TC_WINDOWS_DRIVER
// VC2005 workaround: disable declarations that conflict with winnt.h
#define _interlockedbittestandset CRYPTOPP_DISABLED_INTRINSIC_1
#define _interlockedbittestandreset CRYPTOPP_DISABLED_INTRINSIC_2
#define _interlockedbittestandset64 CRYPTOPP_DISABLED_INTRINSIC_3
#define _interlockedbittestandreset64 CRYPTOPP_DISABLED_INTRINSIC_4
#include <intrin.h>
#undef _interlockedbittestandset
#undef _interlockedbittestandreset
#undef _interlockedbittestandset64
#undef _interlockedbittestandreset64
#endif
#define CRYPTOPP_FAST_ROTATE(x) 1
#elif _MSC_VER >= 1300
#define CRYPTOPP_FAST_ROTATE(x) ((x) == 32 | (x) == 64)
#else
#define CRYPTOPP_FAST_ROTATE(x) ((x) == 32)
#endif
#elif (defined(__MWERKS__) && TARGET_CPU_PPC) || \
(defined(__GNUC__) && (defined(_ARCH_PWR2) || defined(_ARCH_PWR) || defined(_ARCH_PPC) || defined(_ARCH_PPC64) || defined(_ARCH_COM)))
#define CRYPTOPP_FAST_ROTATE(x) ((x) == 32)
#elif defined(__GNUC__) && (CRYPTOPP_BOOL_X64 || CRYPTOPP_BOOL_X86) // depend on GCC's peephole optimization to generate rotate instructions
#define CRYPTOPP_FAST_ROTATE(x) 1
#else
#define CRYPTOPP_FAST_ROTATE(x) 0
#endif
#if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
// Intel C++ Compiler 10.0 calls a function instead of using the rotate instruction when using these instructions
#define rotr32(x,n) _rotr(x, n)
#define rotl32(x,n) _rotl(x, n)
#define rotr64(x,n) _rotr64(x, n)
#define rotl64(x,n) _rotl64(x, n)
#else
#define rotr32(x,n) (((x) >> n) | ((x) << (32 - n)))
#define rotl32(x,n) (((x) << n) | ((x) >> (32 - n)))
#define rotr64(x,n) (((x) >> n) | ((x) << (64 - n)))
#define rotl64(x,n) (((x) << n) | ((x) >> (64 - n)))
#endif
#if defined(__GNUC__) && defined(__linux__)
#define CRYPTOPP_BYTESWAP_AVAILABLE
#include <byteswap.h>
#elif defined(_MSC_VER) && _MSC_VER >= 1300
#define bswap_32(x) _byteswap_ulong(x)
#define bswap_64(x) _byteswap_uint64(x)
#else
#ifdef CRYPTOPP_FAST_ROTATE(32)
#define bswap_32(x) (rotr32((x), 8U) & 0xff00ff00) | (rotl32((x), 8U) & 0x00ff00ff)
#else
#define bswap_32(x) (rotl32((((x) & 0xFF00FF00) >> 8) | (((x) & 0x00FF00FF) << 8), 16U))
#endif
#define bswap_64(x) rotl64(((((((value & LL(0xFF00FF00FF00FF00)) >> 8) | ((value & LL(0x00FF00FF00FF00FF)) << 8)) & LL(0xFFFF0000FFFF0000)) >> 16) | (((((value & LL(0xFF00FF00FF00FF00)) >> 8) | ((value & LL(0x00FF00FF00FF00FF)) << 8)) & LL(0x0000FFFF0000FFFF)) << 16)), 32U)
#endif
VC_INLINE uint32 ByteReverseWord32 (uint32 value)
{
#if defined(__GNUC__) && defined(CRYPTOPP_X86_ASM_AVAILABLE)
__asm__ ("bswap %0" : "=r" (value) : "0" (value));
return value;
#elif defined(CRYPTOPP_BYTESWAP_AVAILABLE)
return bswap_32(value);
#elif defined(__MWERKS__) && TARGET_CPU_PPC
return (uint32)__lwbrx(&value,0);
#elif _MSC_VER >= 1400 || (_MSC_VER >= 1300 && !defined(_DLL))
return _byteswap_ulong(value);
#elif CRYPTOPP_FAST_ROTATE(32)
// 5 instructions with rotate instruction, 9 without
return (rotr32(value, 8U) & 0xff00ff00) | (rotl32(value, 8U) & 0x00ff00ff);
#else
// 6 instructions with rotate instruction, 8 without
value = ((value & 0xFF00FF00) >> 8) | ((value & 0x00FF00FF) << 8);
return rotl32(value, 16U);
#endif
}
VC_INLINE uint64 ByteReverseWord64(uint64 value)
{
#if defined(__GNUC__) && defined(CRYPTOPP_X86_ASM_AVAILABLE) && defined(__x86_64__)
__asm__ ("bswap %0" : "=r" (value) : "0" (value));
return value;
#elif defined(CRYPTOPP_BYTESWAP_AVAILABLE)
return bswap_64(value);
#elif defined(_MSC_VER) && _MSC_VER >= 1300
return _byteswap_uint64(value);
#else
value = ((value & LL(0xFF00FF00FF00FF00)) >> 8) | ((value & LL(0x00FF00FF00FF00FF)) << 8);
value = ((value & LL(0xFFFF0000FFFF0000)) >> 16) | ((value & LL(0x0000FFFF0000FFFF)) << 16);
return rotl64(value, 32U);
#endif
}
VC_INLINE void CorrectEndianess(uint64 *out, const uint64 *in, size_t byteCount)
{
size_t i, count = byteCount/sizeof(uint64);
for (i=0; i<count; i++)
out[i] = ByteReverseWord64(in[i]);
}
#ifdef CRYPTOPP_ALLOW_UNALIGNED_DATA_ACCESS
#define GetAlignmentOf(T) 1
#elif (_MSC_VER >= 1300)
#define GetAlignmentOf(T) __alignof(T)
#elif defined(__GNUC__)
#define GetAlignmentOf(T) __alignof__(T)
#else
#define GetAlignmentOf(T) sizeof(T)
#endif
#define IsPowerOf2(n) (((n) > 0) && (((n) & ((n)-1)) == 0))
#define ModPowerOf2(a,b) ((a) & ((b)-1))
#define IsAlignedOn(p,alignment) ((alignment==1) || (IsPowerOf2(alignment) ? ModPowerOf2((size_t)p, alignment) == 0 : (size_t)p % alignment == 0))
#define IsAligned16(p) IsAlignedOn(p, GetAlignmentOf(uint64))
#endif

View File

@ -16,6 +16,7 @@
#include "Crypto.h"
#include "Fat.h"
#include "Tests.h"
#include "cpu.h"
#include "Apidrvr.h"
#include "Boot/Windows/BootDefs.h"
@ -72,6 +73,8 @@ NTSTATUS DriverEntry (PDRIVER_OBJECT DriverObject, PUNICODE_STRING RegistryPath)
Dump ("DriverEntry " TC_APP_NAME " " VERSION_STRING "\n");
DetectX86Features ();
PsGetVersion (&OsMajorVersion, &OsMinorVersion, NULL, NULL);
// Load dump filter if the main driver is already loaded

View File

@ -15,6 +15,7 @@
code distribution packages. */
#include "Tcdefs.h"
#include "cpu.h"
#include <time.h>
#include <math.h>
@ -991,6 +992,8 @@ int WINAPI wWinMain (HINSTANCE hInstance, HINSTANCE hPrevInstance, wchar_t *lpsz
/* application title */
lpszTitle = L"VeraCrypt Expander";
DetectX86Features ();
status = DriverAttach ();
if (status != 0)
{

Binary file not shown.

View File

@ -15,6 +15,7 @@
#include <typeinfo>
#include <wx/apptrait.h>
#include <wx/cmdline.h>
#include "Crypto/cpu.h"
#include "Platform/PlatformTest.h"
#ifdef TC_UNIX
#include <errno.h>
@ -504,6 +505,9 @@ namespace VeraCrypt
SetAppName (Application::GetName());
SetClassName (Application::GetName());
#ifdef CRYPTOPP_CPUID_AVAILABLE
DetectX86Features ();
#endif
LangString.Init();
Core->Init();
@ -834,10 +838,8 @@ namespace VeraCrypt
#else
// MIME handler for directory seems to be unavailable through wxWidgets
wxString desktop = GetTraits()->GetDesktopEnvironment();
bool xdgOpenPresent = wxFileName::IsFileExecutable (wxT("/usr/bin/xdg-open"));
bool nautilusPresent = wxFileName::IsFileExecutable (wxT("/usr/bin/nautilus"));
if (desktop == L"GNOME" || (desktop.empty() && !xdgOpenPresent && nautilusPresent))
if (desktop == L"GNOME")
{
args.push_back ("--no-default-window");
args.push_back ("--no-desktop");
@ -870,7 +872,7 @@ namespace VeraCrypt
catch (exception &e) { ShowError (e); }
}
}
else if (xdgOpenPresent)
else if (wxFileName::IsFileExecutable (wxT("/usr/bin/xdg-open")))
{
// Fallback on the standard xdg-open command
// which is not always available by default

View File

@ -12,6 +12,7 @@
code distribution packages. */
#include "Tcdefs.h"
#include "cpu.h"
#include <time.h>
#include <math.h>
@ -8648,6 +8649,8 @@ int WINAPI wWinMain (HINSTANCE hInstance, HINSTANCE hPrevInstance, wchar_t *lpsz
VirtualLock (&defaultMountOptions, sizeof (defaultMountOptions));
VirtualLock (&szFileName, sizeof(szFileName));
DetectX86Features ();
try
{
BootEncObj = new BootEncryption (NULL);

View File

@ -45,6 +45,7 @@ endif
OBJS += ../Crypto/Aeskey.o
OBJS += ../Crypto/Aestab.o
OBJS += ../Crypto/cpu.o
OBJS += ../Crypto/Rmd160.o
OBJS += ../Crypto/Serpent.o
OBJS += ../Crypto/Sha2.o