220 lines
6.1 KiB
C
220 lines
6.1 KiB
C
/*
|
|
This code is written by kerukuro for cppcrypto library (http://cppcrypto.sourceforge.net/)
|
|
and released into public domain.
|
|
*/
|
|
|
|
/* adapted for VeraCrypt */
|
|
|
|
#include "chacha256.h"
|
|
#include "cpu.h"
|
|
#include "misc.h"
|
|
|
|
|
|
|
|
#define rotater32(x,n) rotr32(x, n)
|
|
#define rotatel32(x,n) rotl32(x, n)
|
|
|
|
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE
|
|
void chacha_ECRYPT_encrypt_bytes(size_t bytes, uint32* x, const unsigned char* m, unsigned char* out, unsigned char* output, unsigned int r);
|
|
#endif
|
|
|
|
static VC_INLINE void xor_block_512(const unsigned char* in, const unsigned char* prev, unsigned char* out)
|
|
{
|
|
#if CRYPTOPP_BOOL_SSE2_INTRINSICS_AVAILABLE && !defined(_UEFI) && (!defined (TC_WINDOWS_DRIVER) || (!defined (DEBUG) && defined (_WIN64)))
|
|
if (HasSSE2())
|
|
{
|
|
__m128i b1 = _mm_loadu_si128((const __m128i*) in);
|
|
__m128i p1 = _mm_loadu_si128((const __m128i*) prev);
|
|
__m128i b2 = _mm_loadu_si128((const __m128i*) (in + 16));
|
|
__m128i p2 = _mm_loadu_si128((const __m128i*) (prev + 16));
|
|
|
|
_mm_storeu_si128((__m128i*) out, _mm_xor_si128(b1, p1));
|
|
_mm_storeu_si128((__m128i*) (out + 16), _mm_xor_si128(b2, p2));
|
|
|
|
b1 = _mm_loadu_si128((const __m128i*) (in + 32));
|
|
p1 = _mm_loadu_si128((const __m128i*) (prev + 32));
|
|
b2 = _mm_loadu_si128((const __m128i*) (in + 48));
|
|
p2 = _mm_loadu_si128((const __m128i*) (prev + 48));
|
|
|
|
_mm_storeu_si128((__m128i*) (out + 32), _mm_xor_si128(b1, p1));
|
|
_mm_storeu_si128((__m128i*) (out + 48), _mm_xor_si128(b2, p2));
|
|
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
int i;
|
|
for (i = 0; i < 64; i++)
|
|
out[i] = in[i] ^ prev[i];
|
|
}
|
|
|
|
}
|
|
|
|
static VC_INLINE void chacha_core(uint32* x, int r)
|
|
{
|
|
int i;
|
|
for (i = 0; i < r; i++)
|
|
{
|
|
x[0] += x[4];
|
|
x[12] = rotatel32(x[12] ^ x[0], 16);
|
|
x[8] += x[12];
|
|
x[4] = rotatel32(x[4] ^ x[8], 12);
|
|
x[0] += x[4];
|
|
x[12] = rotatel32(x[12] ^ x[0], 8);
|
|
x[8] += x[12];
|
|
x[4] = rotatel32(x[4] ^ x[8], 7);
|
|
|
|
x[1] += x[5];
|
|
x[13] = rotatel32(x[13] ^ x[1], 16);
|
|
x[9] += x[13];
|
|
x[5] = rotatel32(x[5] ^ x[9], 12);
|
|
x[1] += x[5];
|
|
x[13] = rotatel32(x[13] ^ x[1], 8);
|
|
x[9] += x[13];
|
|
x[5] = rotatel32(x[5] ^ x[9], 7);
|
|
|
|
x[2] += x[6];
|
|
x[14] = rotatel32(x[14] ^ x[2], 16);
|
|
x[10] += x[14];
|
|
x[6] = rotatel32(x[6] ^ x[10], 12);
|
|
x[2] += x[6];
|
|
x[14] = rotatel32(x[14] ^ x[2], 8);
|
|
x[10] += x[14];
|
|
x[6] = rotatel32(x[6] ^ x[10], 7);
|
|
|
|
x[3] += x[7];
|
|
x[15] = rotatel32(x[15] ^ x[3], 16);
|
|
x[11] += x[15];
|
|
x[7] = rotatel32(x[7] ^ x[11], 12);
|
|
x[3] += x[7];
|
|
x[15] = rotatel32(x[15] ^ x[3], 8);
|
|
x[11] += x[15];
|
|
x[7] = rotatel32(x[7] ^ x[11], 7);
|
|
|
|
x[0] += x[5];
|
|
x[15] = rotatel32(x[15] ^ x[0], 16);
|
|
x[10] += x[15];
|
|
x[5] = rotatel32(x[5] ^ x[10], 12);
|
|
x[0] += x[5];
|
|
x[15] = rotatel32(x[15] ^ x[0], 8);
|
|
x[10] += x[15];
|
|
x[5] = rotatel32(x[5] ^ x[10], 7);
|
|
|
|
x[1] += x[6];
|
|
x[12] = rotatel32(x[12] ^ x[1], 16);
|
|
x[11] += x[12];
|
|
x[6] = rotatel32(x[6] ^ x[11], 12);
|
|
x[1] += x[6];
|
|
x[12] = rotatel32(x[12] ^ x[1], 8);
|
|
x[11] += x[12];
|
|
x[6] = rotatel32(x[6] ^ x[11], 7);
|
|
|
|
x[2] += x[7];
|
|
x[13] = rotatel32(x[13] ^ x[2], 16);
|
|
x[8] += x[13];
|
|
x[7] = rotatel32(x[7] ^ x[8], 12);
|
|
x[2] += x[7];
|
|
x[13] = rotatel32(x[13] ^ x[2], 8);
|
|
x[8] += x[13];
|
|
x[7] = rotatel32(x[7] ^ x[8], 7);
|
|
|
|
x[3] += x[4];
|
|
x[14] = rotatel32(x[14] ^ x[3], 16);
|
|
x[9] += x[14];
|
|
x[4] = rotatel32(x[4] ^ x[9], 12);
|
|
x[3] += x[4];
|
|
x[14] = rotatel32(x[14] ^ x[3], 8);
|
|
x[9] += x[14];
|
|
x[4] = rotatel32(x[4] ^ x[9], 7);
|
|
}
|
|
}
|
|
|
|
static VC_INLINE void chacha_hash(const uint32* in, uint32* out, int r)
|
|
{
|
|
uint32 x[16];
|
|
int i;
|
|
memcpy(x, in, 64);
|
|
chacha_core(x, r);
|
|
for (i = 0; i < 16; ++i)
|
|
out[i] = x[i] + in[i];
|
|
}
|
|
|
|
static VC_INLINE void incrementSalsaCounter(uint32* input, uint32* block, int r)
|
|
{
|
|
chacha_hash(input, block, r);
|
|
if (!++input[12])
|
|
++input[13];
|
|
}
|
|
|
|
static VC_INLINE void do_encrypt(const unsigned char* in, size_t len, unsigned char* out, int r, size_t* posPtr, uint32* input, uint32* block)
|
|
{
|
|
size_t i = 0, pos = *posPtr;
|
|
if (pos)
|
|
{
|
|
while (pos < len && pos < 64)
|
|
{
|
|
out[i] = in[i] ^ ((unsigned char*)block)[pos++];
|
|
++i;
|
|
}
|
|
len -= i;
|
|
}
|
|
if (len)
|
|
pos = 0;
|
|
|
|
#if CRYPTOPP_SSSE3_AVAILABLE && !defined(_UEFI) && (!defined (TC_WINDOWS_DRIVER) || (!defined (DEBUG) && defined (_WIN64)))
|
|
if (HasSSSE3())
|
|
{
|
|
size_t fullblocks = len - len % 64;
|
|
if (fullblocks)
|
|
{
|
|
chacha_ECRYPT_encrypt_bytes(fullblocks, input, in + i, out + i, (unsigned char*)block, r);
|
|
i += fullblocks;
|
|
len -= fullblocks;
|
|
}
|
|
if (len)
|
|
{
|
|
chacha_ECRYPT_encrypt_bytes(len, input, in + i, out + i, (unsigned char*)block, r);
|
|
pos = len;
|
|
}
|
|
*posPtr = pos;
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
for (; len; len -= VC_MIN(64, len))
|
|
{
|
|
incrementSalsaCounter(input, block, r);
|
|
if (len >= 64)
|
|
{
|
|
xor_block_512(in + i, (unsigned char*)block, out + i);
|
|
i += 64;
|
|
}
|
|
else
|
|
{
|
|
for (; pos < len; pos++, i++)
|
|
out[i] = in[i] ^ ((unsigned char*)block)[pos];
|
|
}
|
|
}
|
|
*posPtr = pos;
|
|
}
|
|
|
|
void ChaCha256Init(ChaCha256Ctx* ctx, const unsigned char* key, const unsigned char* iv, int rounds)
|
|
{
|
|
ctx->internalRounds = rounds / 2;
|
|
ctx->pos = 0;
|
|
|
|
ctx->input_[12] = 0;
|
|
ctx->input_[13] = 0;
|
|
memcpy(ctx->input_ + 4, key, 32);
|
|
memcpy(ctx->input_ + 14, iv, 8);
|
|
ctx->input_[0] = 0x61707865;
|
|
ctx->input_[1] = 0x3320646E;
|
|
ctx->input_[2] = 0x79622D32;
|
|
ctx->input_[3] = 0x6B206574;
|
|
}
|
|
|
|
void ChaCha256Encrypt(ChaCha256Ctx* ctx, const unsigned char* in, size_t len, unsigned char* out)
|
|
{
|
|
do_encrypt(in, len, out, ctx->internalRounds, &ctx->pos, ctx->input_, ctx->block_);
|
|
}
|