2015-07-08 02:53:39 +00:00
|
|
|
#include "aes.hpp"
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <cpuid.h>
|
|
|
|
|
|
|
|
namespace Athena
|
|
|
|
{
|
|
|
|
|
|
|
|
/* rotates x one bit to the left */
|
|
|
|
|
|
|
|
#define ROTL(x) (((x)>>7)|((x)<<1))
|
|
|
|
|
|
|
|
/* Rotates 32-bit word left by 1, 2 or 3 byte */
|
|
|
|
|
|
|
|
#define ROTL8(x) (((x)<<8)|((x)>>24))
|
|
|
|
#define ROTL16(x) (((x)<<16)|((x)>>16))
|
|
|
|
#define ROTL24(x) (((x)<<24)|((x)>>8))
|
|
|
|
|
|
|
|
/* Fixed Data */
|
|
|
|
|
|
|
|
static const uint8_t InCo[4] = {0xB, 0xD, 0x9, 0xE}; /* Inverse Coefficients */
|
|
|
|
|
|
|
|
static inline uint32_t pack(const uint8_t* b)
|
|
|
|
{
|
|
|
|
/* pack bytes into a 32-bit Word */
|
|
|
|
return ((uint32_t)b[3] << 24) | ((uint32_t)b[2] << 16) | ((uint32_t)b[1] << 8) | (uint32_t)b[0];
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline void unpack(uint32_t a, uint8_t* b)
|
|
|
|
{
|
|
|
|
/* unpack bytes from a word */
|
|
|
|
b[0] = (uint8_t)a;
|
|
|
|
b[1] = (uint8_t)(a >> 8);
|
|
|
|
b[2] = (uint8_t)(a >> 16);
|
|
|
|
b[3] = (uint8_t)(a >> 24);
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline uint8_t xtime(uint8_t a)
|
|
|
|
{
|
|
|
|
uint8_t b;
|
|
|
|
|
|
|
|
if (a & 0x80) b = 0x1B;
|
|
|
|
else b = 0;
|
|
|
|
|
|
|
|
a <<= 1;
|
|
|
|
a ^= b;
|
|
|
|
return a;
|
|
|
|
}
|
|
|
|
|
|
|
|
class SoftwareAES : public IAES
|
|
|
|
{
|
|
|
|
protected:
|
|
|
|
uint8_t fbsub[256];
|
|
|
|
uint8_t rbsub[256];
|
|
|
|
uint8_t ptab[256], ltab[256];
|
|
|
|
uint32_t ftable[256];
|
|
|
|
uint32_t rtable[256];
|
|
|
|
uint32_t rco[30];
|
|
|
|
|
|
|
|
/* Parameter-dependent data */
|
|
|
|
|
|
|
|
int Nk, Nb, Nr;
|
|
|
|
uint8_t fi[24], ri[24];
|
|
|
|
uint32_t fkey[120];
|
|
|
|
uint32_t rkey[120];
|
|
|
|
|
|
|
|
|
|
|
|
uint8_t bmul(uint8_t x, uint8_t y);
|
|
|
|
uint32_t SubByte(uint32_t a);
|
|
|
|
uint8_t product(uint32_t x, uint32_t y);
|
|
|
|
uint32_t InvMixCol(uint32_t x);
|
|
|
|
uint8_t ByteSub(uint8_t x);
|
|
|
|
void gentables(void);
|
|
|
|
void gkey(int nb, int nk, const uint8_t* key);
|
|
|
|
void _encrypt(uint8_t* buff);
|
|
|
|
void _decrypt(uint8_t* buff);
|
|
|
|
|
|
|
|
public:
|
|
|
|
void encrypt(const uint8_t* iv, const uint8_t* inbuf, uint8_t* outbuf, uint64_t len);
|
|
|
|
void decrypt(const uint8_t* iv, const uint8_t* inbuf, uint8_t* outbuf, uint64_t len);
|
|
|
|
void setKey(const uint8_t* key);
|
|
|
|
};
|
|
|
|
|
|
|
|
uint8_t SoftwareAES::bmul(uint8_t x, uint8_t y)
|
|
|
|
{
|
|
|
|
/* x.y= AntiLog(Log(x) + Log(y)) */
|
|
|
|
if (x && y) return ptab[(ltab[x] + ltab[y]) % 255];
|
|
|
|
else return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t SoftwareAES::SubByte(uint32_t a)
|
|
|
|
{
|
|
|
|
uint8_t b[4];
|
|
|
|
unpack(a, b);
|
|
|
|
b[0] = fbsub[b[0]];
|
|
|
|
b[1] = fbsub[b[1]];
|
|
|
|
b[2] = fbsub[b[2]];
|
|
|
|
b[3] = fbsub[b[3]];
|
|
|
|
return pack(b);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint8_t SoftwareAES::product(uint32_t x, uint32_t y)
|
|
|
|
{
|
|
|
|
/* dot product of two 4-byte arrays */
|
|
|
|
uint8_t xb[4], yb[4];
|
|
|
|
unpack(x, xb);
|
|
|
|
unpack(y, yb);
|
|
|
|
return bmul(xb[0], yb[0])^bmul(xb[1], yb[1])^bmul(xb[2], yb[2])^bmul(xb[3], yb[3]);
|
|
|
|
}
|
|
|
|
|
|
|
|
uint32_t SoftwareAES::InvMixCol(uint32_t x)
|
|
|
|
{
|
|
|
|
/* matrix Multiplication */
|
|
|
|
uint32_t y, m;
|
|
|
|
uint8_t b[4];
|
|
|
|
|
|
|
|
m = pack(InCo);
|
|
|
|
b[3] = product(m, x);
|
|
|
|
m = ROTL24(m);
|
|
|
|
b[2] = product(m, x);
|
|
|
|
m = ROTL24(m);
|
|
|
|
b[1] = product(m, x);
|
|
|
|
m = ROTL24(m);
|
|
|
|
b[0] = product(m, x);
|
|
|
|
y = pack(b);
|
|
|
|
return y;
|
|
|
|
}
|
|
|
|
|
|
|
|
uint8_t SoftwareAES::ByteSub(uint8_t x)
|
|
|
|
{
|
|
|
|
uint8_t y = ptab[255 - ltab[x]]; /* multiplicative inverse */
|
|
|
|
x = y;
|
|
|
|
x = ROTL(x);
|
|
|
|
y ^= x;
|
|
|
|
x = ROTL(x);
|
|
|
|
y ^= x;
|
|
|
|
x = ROTL(x);
|
|
|
|
y ^= x;
|
|
|
|
x = ROTL(x);
|
|
|
|
y ^= x;
|
|
|
|
y ^= 0x63;
|
|
|
|
return y;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SoftwareAES::gentables(void)
|
|
|
|
{
|
|
|
|
/* generate tables */
|
|
|
|
int i;
|
|
|
|
uint8_t y, b[4];
|
|
|
|
|
|
|
|
/* use 3 as primitive root to generate power and log tables */
|
|
|
|
|
|
|
|
ltab[0] = 0;
|
|
|
|
ptab[0] = 1;
|
|
|
|
ltab[1] = 0;
|
|
|
|
ptab[1] = 3;
|
|
|
|
ltab[3] = 1;
|
|
|
|
|
|
|
|
for (i = 2; i < 256; i++)
|
|
|
|
{
|
|
|
|
ptab[i] = ptab[i - 1] ^ xtime(ptab[i - 1]);
|
|
|
|
ltab[ptab[i]] = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* affine transformation:- each bit is xored with itself shifted one bit */
|
|
|
|
|
|
|
|
fbsub[0] = 0x63;
|
|
|
|
rbsub[0x63] = 0;
|
|
|
|
|
|
|
|
for (i = 1; i < 256; i++)
|
|
|
|
{
|
|
|
|
y = ByteSub((uint8_t)i);
|
|
|
|
fbsub[i] = y;
|
|
|
|
rbsub[y] = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0, y = 1; i < 30; i++)
|
|
|
|
{
|
|
|
|
rco[i] = y;
|
|
|
|
y = xtime(y);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* calculate forward and reverse tables */
|
|
|
|
for (i = 0; i < 256; i++)
|
|
|
|
{
|
|
|
|
y = fbsub[i];
|
|
|
|
b[3] = y ^ xtime(y);
|
|
|
|
b[2] = y;
|
|
|
|
b[1] = y;
|
|
|
|
b[0] = xtime(y);
|
|
|
|
ftable[i] = pack(b);
|
|
|
|
|
|
|
|
y = rbsub[i];
|
|
|
|
b[3] = bmul(InCo[0], y);
|
|
|
|
b[2] = bmul(InCo[1], y);
|
|
|
|
b[1] = bmul(InCo[2], y);
|
|
|
|
b[0] = bmul(InCo[3], y);
|
|
|
|
rtable[i] = pack(b);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void SoftwareAES::gkey(int nb, int nk, const uint8_t* key)
|
|
|
|
{
|
|
|
|
/* blocksize=32*nb bits. Key=32*nk bits */
|
|
|
|
/* currently nb,bk = 4, 6 or 8 */
|
|
|
|
/* key comes as 4*Nk bytes */
|
|
|
|
/* Key Scheduler. Create expanded encryption key */
|
|
|
|
int i, j, k, m, N;
|
|
|
|
int C1, C2, C3;
|
|
|
|
uint32_t CipherKey[8];
|
|
|
|
|
|
|
|
Nb = nb;
|
|
|
|
Nk = nk;
|
|
|
|
|
|
|
|
/* Nr is number of rounds */
|
|
|
|
if (Nb >= Nk) Nr = 6 + Nb;
|
|
|
|
else Nr = 6 + Nk;
|
|
|
|
|
|
|
|
C1 = 1;
|
|
|
|
|
|
|
|
if (Nb < 8) { C2 = 2; C3 = 3; }
|
|
|
|
else { C2 = 3; C3 = 4; }
|
|
|
|
|
|
|
|
/* pre-calculate forward and reverse increments */
|
|
|
|
for (m = j = 0; j < nb; j++, m += 3)
|
|
|
|
{
|
|
|
|
fi[m] = (j + C1) % nb;
|
|
|
|
fi[m + 1] = (j + C2) % nb;
|
|
|
|
fi[m + 2] = (j + C3) % nb;
|
|
|
|
ri[m] = (nb + j - C1) % nb;
|
|
|
|
ri[m + 1] = (nb + j - C2) % nb;
|
|
|
|
ri[m + 2] = (nb + j - C3) % nb;
|
|
|
|
}
|
|
|
|
|
|
|
|
N = Nb * (Nr + 1);
|
|
|
|
|
|
|
|
for (i = j = 0; i < Nk; i++, j += 4)
|
|
|
|
{
|
|
|
|
CipherKey[i] = pack(key + j);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < Nk; i++) fkey[i] = CipherKey[i];
|
|
|
|
|
|
|
|
for (j = Nk, k = 0; j < N; j += Nk, k++)
|
|
|
|
{
|
|
|
|
fkey[j] = fkey[j - Nk] ^ SubByte(ROTL24(fkey[j - 1]))^rco[k];
|
|
|
|
|
|
|
|
if (Nk <= 6)
|
|
|
|
{
|
|
|
|
for (i = 1; i < Nk && (i + j) < N; i++)
|
|
|
|
fkey[i + j] = fkey[i + j - Nk] ^ fkey[i + j - 1];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
for (i = 1; i < 4 && (i + j) < N; i++)
|
|
|
|
fkey[i + j] = fkey[i + j - Nk] ^ fkey[i + j - 1];
|
|
|
|
|
|
|
|
if ((j + 4) < N) fkey[j + 4] = fkey[j + 4 - Nk] ^ SubByte(fkey[j + 3]);
|
|
|
|
|
|
|
|
for (i = 5; i < Nk && (i + j) < N; i++)
|
|
|
|
fkey[i + j] = fkey[i + j - Nk] ^ fkey[i + j - 1];
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/* now for the expanded decrypt key in reverse order */
|
|
|
|
|
|
|
|
for (j = 0; j < Nb; j++) rkey[j + N - Nb] = fkey[j];
|
|
|
|
|
|
|
|
for (i = Nb; i < N - Nb; i += Nb)
|
|
|
|
{
|
|
|
|
k = N - Nb - i;
|
|
|
|
|
|
|
|
for (j = 0; j < Nb; j++) rkey[k + j] = InvMixCol(fkey[i + j]);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (j = N - Nb; j < N; j++) rkey[j - N + Nb] = fkey[j];
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* There is an obvious time/space trade-off possible here. *
|
|
|
|
* Instead of just one ftable[], I could have 4, the other *
|
|
|
|
* 3 pre-rotated to save the ROTL8, ROTL16 and ROTL24 overhead */
|
|
|
|
|
|
|
|
void SoftwareAES::_encrypt(uint8_t* buff)
|
|
|
|
{
|
|
|
|
int i, j, k, m;
|
|
|
|
uint32_t a[8], b[8], *x, *y, *t;
|
|
|
|
|
|
|
|
for (i = j = 0; i < Nb; i++, j += 4)
|
|
|
|
{
|
|
|
|
a[i] = pack(buff + j);
|
|
|
|
a[i] ^= fkey[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
k = Nb;
|
|
|
|
x = a;
|
|
|
|
y = b;
|
|
|
|
|
|
|
|
/* State alternates between a and b */
|
|
|
|
for (i = 1; i < Nr; i++)
|
|
|
|
{
|
|
|
|
/* Nr is number of rounds. May be odd. */
|
|
|
|
|
|
|
|
/* if Nb is fixed - unroll this next
|
|
|
|
loop and hard-code in the values of fi[] */
|
|
|
|
|
|
|
|
for (m = j = 0; j < Nb; j++, m += 3)
|
|
|
|
{
|
|
|
|
/* deal with each 32-bit element of the State */
|
|
|
|
/* This is the time-critical bit */
|
|
|
|
y[j] = fkey[k++] ^ ftable[(uint8_t)x[j]] ^
|
|
|
|
ROTL8(ftable[(uint8_t)(x[fi[m]] >> 8)])^
|
|
|
|
ROTL16(ftable[(uint8_t)(x[fi[m + 1]] >> 16)])^
|
|
|
|
ROTL24(ftable[(uint8_t)(x[fi[m + 2]] >> 24)]);
|
|
|
|
}
|
|
|
|
|
|
|
|
t = x;
|
|
|
|
x = y;
|
|
|
|
y = t; /* swap pointers */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Last Round - unroll if possible */
|
|
|
|
for (m = j = 0; j < Nb; j++, m += 3)
|
|
|
|
{
|
|
|
|
y[j] = fkey[k++] ^ (uint32_t)fbsub[(uint8_t)x[j]] ^
|
|
|
|
ROTL8((uint32_t)fbsub[(uint8_t)(x[fi[m]] >> 8)])^
|
|
|
|
ROTL16((uint32_t)fbsub[(uint8_t)(x[fi[m + 1]] >> 16)])^
|
|
|
|
ROTL24((uint32_t)fbsub[(uint8_t)(x[fi[m + 2]] >> 24)]);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = j = 0; i < Nb; i++, j += 4)
|
|
|
|
{
|
|
|
|
unpack(y[i], (uint8_t*)&buff[j]);
|
|
|
|
x[i] = y[i] = 0; /* clean up stack */
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SoftwareAES::_decrypt(uint8_t* buff)
|
|
|
|
{
|
|
|
|
int i, j, k, m;
|
|
|
|
uint32_t a[8], b[8], *x, *y, *t;
|
|
|
|
|
|
|
|
for (i = j = 0; i < Nb; i++, j += 4)
|
|
|
|
{
|
|
|
|
a[i] = pack(buff + j);
|
|
|
|
a[i] ^= rkey[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
k = Nb;
|
|
|
|
x = a;
|
|
|
|
y = b;
|
|
|
|
|
|
|
|
/* State alternates between a and b */
|
|
|
|
for (i = 1; i < Nr; i++)
|
|
|
|
{
|
|
|
|
/* Nr is number of rounds. May be odd. */
|
|
|
|
|
|
|
|
/* if Nb is fixed - unroll this next
|
|
|
|
loop and hard-code in the values of ri[] */
|
|
|
|
|
|
|
|
for (m = j = 0; j < Nb; j++, m += 3)
|
|
|
|
{
|
|
|
|
/* This is the time-critical bit */
|
|
|
|
y[j] = rkey[k++] ^ rtable[(uint8_t)x[j]] ^
|
|
|
|
ROTL8(rtable[(uint8_t)(x[ri[m]] >> 8)])^
|
|
|
|
ROTL16(rtable[(uint8_t)(x[ri[m + 1]] >> 16)])^
|
|
|
|
ROTL24(rtable[(uint8_t)(x[ri[m + 2]] >> 24)]);
|
|
|
|
}
|
|
|
|
|
|
|
|
t = x;
|
|
|
|
x = y;
|
|
|
|
y = t; /* swap pointers */
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Last Round - unroll if possible */
|
|
|
|
for (m = j = 0; j < Nb; j++, m += 3)
|
|
|
|
{
|
|
|
|
y[j] = rkey[k++] ^ (uint32_t)rbsub[(uint8_t)x[j]] ^
|
|
|
|
ROTL8((uint32_t)rbsub[(uint8_t)(x[ri[m]] >> 8)])^
|
|
|
|
ROTL16((uint32_t)rbsub[(uint8_t)(x[ri[m + 1]] >> 16)])^
|
|
|
|
ROTL24((uint32_t)rbsub[(uint8_t)(x[ri[m + 2]] >> 24)]);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = j = 0; i < Nb; i++, j += 4)
|
|
|
|
{
|
|
|
|
unpack(y[i], (uint8_t*)&buff[j]);
|
|
|
|
x[i] = y[i] = 0; /* clean up stack */
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
void SoftwareAES::setKey(const uint8_t* key)
|
|
|
|
{
|
|
|
|
gentables();
|
|
|
|
gkey(4, 4, key);
|
|
|
|
}
|
|
|
|
|
|
|
|
// CBC mode decryption
|
2015-07-09 00:57:51 +00:00
|
|
|
void SoftwareAES::decrypt(const uint8_t* iv, const uint8_t* inbuf, uint8_t* outbuf, uint64_t len)
|
2015-07-08 02:53:39 +00:00
|
|
|
{
|
|
|
|
uint8_t block[16];
|
|
|
|
const uint8_t* ctext_ptr;
|
|
|
|
unsigned int blockno = 0, i;
|
|
|
|
|
|
|
|
//fprintf( stderr,"aes_decrypt(%p, %p, %p, %lld)\n", iv, inbuf, outbuf, len );
|
|
|
|
//printf("aes_decrypt(%p, %p, %p, %lld)\n", iv, inbuf, outbuf, len);
|
|
|
|
|
|
|
|
for (blockno = 0; blockno <= (len / sizeof(block)); blockno++)
|
|
|
|
{
|
|
|
|
unsigned int fraction;
|
|
|
|
|
|
|
|
if (blockno == (len / sizeof(block))) // last block
|
|
|
|
{
|
|
|
|
fraction = len % sizeof(block);
|
|
|
|
|
|
|
|
if (fraction == 0) break;
|
|
|
|
|
|
|
|
memset(block, 0, sizeof(block));
|
|
|
|
}
|
|
|
|
else fraction = 16;
|
|
|
|
|
|
|
|
// debug_printf("block %d: fraction = %d\n", blockno, fraction);
|
|
|
|
memcpy(block, inbuf + blockno * sizeof(block), fraction);
|
|
|
|
_decrypt(block);
|
|
|
|
|
|
|
|
if (blockno == 0) ctext_ptr = iv;
|
|
|
|
else ctext_ptr = (uint8_t*)(inbuf + (blockno - 1) * sizeof(block));
|
|
|
|
|
|
|
|
for (i = 0; i < fraction; i++)
|
|
|
|
outbuf[blockno * sizeof(block) + i] =
|
|
|
|
ctext_ptr[i] ^ block[i];
|
|
|
|
|
|
|
|
// debug_printf("Block %d output: ", blockno);
|
|
|
|
// hexdump(outbuf + blockno*sizeof(block), 16);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// CBC mode encryption
|
|
|
|
void SoftwareAES::encrypt(const uint8_t* iv, const uint8_t* inbuf, uint8_t* outbuf, uint64_t len)
|
|
|
|
{
|
|
|
|
uint8_t block[16];
|
|
|
|
uint8_t feedback[16];
|
|
|
|
memcpy(feedback, iv, 16);
|
|
|
|
unsigned int blockno = 0, i;
|
|
|
|
|
|
|
|
//printf("aes_decrypt(%p, %p, %p, %lld)\n", iv, inbuf, outbuf, len);
|
|
|
|
//fprintf( stderr,"aes_encrypt(%p, %p, %p, %lld)\n", iv, inbuf, outbuf, len);
|
|
|
|
|
|
|
|
for (blockno = 0; blockno <= (len / sizeof(block)); blockno++)
|
|
|
|
{
|
|
|
|
unsigned int fraction;
|
|
|
|
|
|
|
|
if (blockno == (len / sizeof(block))) // last block
|
|
|
|
{
|
|
|
|
fraction = len % sizeof(block);
|
|
|
|
|
|
|
|
if (fraction == 0) break;
|
|
|
|
|
|
|
|
memset(block, 0, sizeof(block));
|
|
|
|
}
|
|
|
|
else fraction = 16;
|
|
|
|
|
|
|
|
// debug_printf("block %d: fraction = %d\n", blockno, fraction);
|
|
|
|
memcpy(block, inbuf + blockno * sizeof(block), fraction);
|
|
|
|
|
|
|
|
for (i = 0; i < fraction; i++)
|
|
|
|
block[i] = inbuf[blockno * sizeof(block) + i] ^ feedback[i];
|
|
|
|
|
|
|
|
_encrypt(block);
|
|
|
|
memcpy(feedback, block, sizeof(block));
|
|
|
|
memcpy(outbuf + blockno * sizeof(block), block, sizeof(block));
|
|
|
|
// debug_printf("Block %d output: ", blockno);
|
|
|
|
// hexdump(outbuf + blockno*sizeof(block), 16);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#if __AES__
|
|
|
|
|
|
|
|
#include <wmmintrin.h>
|
|
|
|
|
|
|
|
class NiAES : public IAES
|
|
|
|
{
|
|
|
|
__m128i m_ekey[11];
|
|
|
|
__m128i m_dkey[11];
|
|
|
|
public:
|
|
|
|
void encrypt(const uint8_t* iv, const uint8_t* inbuf, uint8_t* outbuf, uint64_t len)
|
|
|
|
{
|
|
|
|
__m128i feedback,data;
|
|
|
|
uint64_t i,j;
|
|
|
|
if (len%16)
|
|
|
|
len = len/16+1;
|
|
|
|
else
|
|
|
|
len /= 16;
|
|
|
|
feedback = _mm_loadu_si128((__m128i*)iv);
|
|
|
|
for (i=0 ; i<len ; i++)
|
|
|
|
{
|
|
|
|
data = _mm_loadu_si128(&((__m128i*)inbuf)[i]);
|
|
|
|
feedback = _mm_xor_si128(data, feedback);
|
|
|
|
feedback = _mm_xor_si128(feedback, m_ekey[0]);
|
|
|
|
for (j=1 ; j<10 ; j++)
|
|
|
|
feedback = _mm_aesenc_si128(feedback, m_ekey[j]);
|
|
|
|
feedback = _mm_aesenclast_si128(feedback, m_ekey[j]);
|
|
|
|
_mm_storeu_si128(&((__m128i*)outbuf)[i], feedback);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
void decrypt(const uint8_t* iv, const uint8_t* inbuf, uint8_t* outbuf, uint64_t len)
|
|
|
|
{
|
|
|
|
__m128i data,feedback,last_in;
|
|
|
|
uint64_t i,j;
|
|
|
|
if (len%16)
|
|
|
|
len = len/16+1;
|
|
|
|
else
|
|
|
|
len /= 16;
|
|
|
|
feedback = _mm_loadu_si128((__m128i*)iv);
|
|
|
|
for (i=0 ; i<len ; i++)
|
|
|
|
{
|
|
|
|
last_in=_mm_loadu_si128(&((__m128i*)inbuf)[i]);
|
|
|
|
data = _mm_xor_si128(last_in, m_dkey[0]);
|
|
|
|
for (j=1 ; j<10 ; j++)
|
|
|
|
data = _mm_aesdec_si128(data, m_dkey[j]);
|
|
|
|
data = _mm_aesdeclast_si128(data, m_dkey[j]);
|
|
|
|
data = _mm_xor_si128(data, feedback);
|
|
|
|
_mm_storeu_si128(&((__m128i*)outbuf)[i], data);
|
|
|
|
feedback = last_in;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline __m128i AES_128_ASSIST (__m128i temp1, __m128i temp2)
|
|
|
|
{
|
|
|
|
__m128i temp3;
|
|
|
|
temp2 = _mm_shuffle_epi32 (temp2 ,0xff);
|
|
|
|
temp3 = _mm_slli_si128 (temp1, 0x4);
|
|
|
|
temp1 = _mm_xor_si128 (temp1, temp3);
|
|
|
|
temp3 = _mm_slli_si128 (temp3, 0x4);
|
|
|
|
temp1 = _mm_xor_si128 (temp1, temp3);
|
|
|
|
temp3 = _mm_slli_si128 (temp3, 0x4);
|
|
|
|
temp1 = _mm_xor_si128 (temp1, temp3);
|
|
|
|
temp1 = _mm_xor_si128 (temp1, temp2);
|
|
|
|
return temp1;
|
|
|
|
}
|
|
|
|
|
|
|
|
void setKey(const uint8_t* key)
|
|
|
|
{
|
|
|
|
__m128i temp1, temp2;
|
|
|
|
|
|
|
|
temp1 = _mm_loadu_si128((__m128i*)key);
|
|
|
|
m_ekey[0] = temp1;
|
|
|
|
m_dkey[10] = temp1;
|
|
|
|
temp2 = _mm_aeskeygenassist_si128(temp1, 0x1);
|
|
|
|
temp1 = AES_128_ASSIST(temp1, temp2);
|
|
|
|
m_ekey[1] = temp1;
|
|
|
|
m_dkey[9] = _mm_aesimc_si128(temp1);
|
|
|
|
temp2 = _mm_aeskeygenassist_si128(temp1, 0x2);
|
|
|
|
temp1 = AES_128_ASSIST(temp1, temp2);
|
|
|
|
m_ekey[2] = temp1;
|
|
|
|
m_dkey[8] = _mm_aesimc_si128(temp1);
|
|
|
|
temp2 = _mm_aeskeygenassist_si128(temp1, 0x4);
|
|
|
|
temp1 = AES_128_ASSIST(temp1, temp2);
|
|
|
|
m_ekey[3] = temp1;
|
|
|
|
m_dkey[7] = _mm_aesimc_si128(temp1);
|
|
|
|
temp2 = _mm_aeskeygenassist_si128(temp1, 0x8);
|
|
|
|
temp1 = AES_128_ASSIST(temp1, temp2);
|
|
|
|
m_ekey[4] = temp1;
|
|
|
|
m_dkey[6] = _mm_aesimc_si128(temp1);
|
|
|
|
temp2 = _mm_aeskeygenassist_si128(temp1, 0x10);
|
|
|
|
temp1 = AES_128_ASSIST(temp1, temp2);
|
|
|
|
m_ekey[5] = temp1;
|
|
|
|
m_dkey[5] = _mm_aesimc_si128(temp1);
|
|
|
|
temp2 = _mm_aeskeygenassist_si128(temp1, 0x20);
|
|
|
|
temp1 = AES_128_ASSIST(temp1, temp2);
|
|
|
|
m_ekey[6] = temp1;
|
|
|
|
m_dkey[4] = _mm_aesimc_si128(temp1);
|
|
|
|
temp2 = _mm_aeskeygenassist_si128(temp1, 0x40);
|
|
|
|
temp1 = AES_128_ASSIST(temp1, temp2);
|
|
|
|
m_ekey[7] = temp1;
|
|
|
|
m_dkey[3] = _mm_aesimc_si128(temp1);
|
|
|
|
temp2 = _mm_aeskeygenassist_si128(temp1, 0x80);
|
|
|
|
temp1 = AES_128_ASSIST(temp1, temp2);
|
|
|
|
m_ekey[8] = temp1;
|
|
|
|
m_dkey[2] = _mm_aesimc_si128(temp1);
|
|
|
|
temp2 = _mm_aeskeygenassist_si128(temp1, 0x1b);
|
|
|
|
temp1 = AES_128_ASSIST(temp1, temp2);
|
|
|
|
m_ekey[9] = temp1;
|
|
|
|
m_dkey[1] = _mm_aesimc_si128(temp1);
|
|
|
|
temp2 = _mm_aeskeygenassist_si128(temp1, 0x36);
|
|
|
|
temp1 = AES_128_ASSIST(temp1, temp2);
|
|
|
|
m_ekey[10] = temp1;
|
|
|
|
m_dkey[0] = temp1;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
static int HAS_AES_NI = -1;
|
|
|
|
std::unique_ptr<IAES> NewAES()
|
|
|
|
{
|
|
|
|
#if __AES__
|
|
|
|
if (HAS_AES_NI == -1)
|
|
|
|
{
|
|
|
|
unsigned int a,b,c,d;
|
|
|
|
__cpuid(1, a,b,c,d);
|
|
|
|
HAS_AES_NI = ((c & 0x2000000) != 0);
|
|
|
|
}
|
|
|
|
if (HAS_AES_NI)
|
|
|
|
return std::unique_ptr<IAES>(new NiAES);
|
|
|
|
else
|
|
|
|
return std::unique_ptr<IAES>(new SoftwareAES);
|
|
|
|
#else
|
|
|
|
return std::unique_ptr<IAES>(new SoftwareAES);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|