From: <lab...@us...> - 2018-12-09 16:11:38
|
Revision: 1551 http://sourceforge.net/p/opengtoolkit/svn/1551 Author: labviewer Date: 2018-12-09 16:11:37 +0000 (Sun, 09 Dec 2018) Log Message: ----------- Disable Intel Intrinsic AES support since VS2005 doesn't support it Modified Paths: -------------- trunk/lvzip/c_source/aes/aescrypt.c trunk/lvzip/c_source/aes/aeskey.c trunk/lvzip/c_source/aes/aesopt.h Added Paths: ----------- trunk/lvzip/c_source/aes/aes_ni.c trunk/lvzip/c_source/aes/aes_ni.h Added: trunk/lvzip/c_source/aes/aes_ni.c =================================================================== --- trunk/lvzip/c_source/aes/aes_ni.c (rev 0) +++ trunk/lvzip/c_source/aes/aes_ni.c 2018-12-09 16:11:37 UTC (rev 1551) @@ -0,0 +1,687 @@ +/* +Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved. + +The redistribution and use of this software (with or without changes) +is allowed without the payment of fees or royalties provided that: + + source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation. + +This software is provided 'as is' with no explicit or implied warranties +in respect of its operation, including, but not limited to, correctness +and fitness for purpose. +--------------------------------------------------------------------------- +Issue Date: 09/09/2014 +*/ + +#include "aes_ni.h" + +#if defined( USE_INTEL_AES_IF_PRESENT ) + +#if defined(_MSC_VER) + +#include <intrin.h> +#pragma intrinsic(__cpuid) +#define INLINE __inline + +INLINE int has_aes_ni(void) +{ + static int test = -1; + if(test < 0) + { + int cpu_info[4]; + __cpuid(cpu_info, 1); + test = cpu_info[2] & 0x02000000; + } + return test; +} + +#elif defined( __GNUC__ ) + +#include <cpuid.h> + +#if !defined(__clang__) +#pragma GCC target ("ssse3") +#pragma GCC target ("sse4.1") +#pragma GCC target ("aes") +#endif + +#include <x86intrin.h> +#define INLINE static __inline + +INLINE int has_aes_ni() +{ + static int test = -1; + if(test < 0) + { + unsigned int a, b, c, d; + if(!__get_cpuid(1, &a, &b, &c, &d)) + test = 0; + else + test = (c & 0x2000000); + } + return test; +} + +#else +#error AES New Instructions require Microsoft, Intel, GNU C, or CLANG +#endif + +INLINE __m128i aes_128_assist(__m128i t1, __m128i t2) +{ + __m128i t3; + t2 = _mm_shuffle_epi32(t2, 0xff); + t3 = _mm_slli_si128(t1, 0x4); + t1 = _mm_xor_si128(t1, t3); + t3 = _mm_slli_si128(t3, 0x4); + t1 = _mm_xor_si128(t1, t3); + t3 = _mm_slli_si128(t3, 0x4); + t1 = _mm_xor_si128(t1, t3); + t1 = _mm_xor_si128(t1, t2); + return t1; +} + +AES_RETURN aes_ni(encrypt_key128)(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ + __m128i t1, t2; + __m128i *ks = (__m128i*)cx->ks; + + if(!has_aes_ni()) + { + return aes_xi(encrypt_key128)(key, cx); + } + + t1 = _mm_loadu_si128((__m128i*)key); + + ks[0] = t1; + + t2 = _mm_aeskeygenassist_si128(t1, 0x1); + t1 = aes_128_assist(t1, t2); + ks[1] = t1; + + t2 = _mm_aeskeygenassist_si128(t1, 0x2); + t1 = aes_128_assist(t1, t2); + ks[2] = t1; + + t2 = _mm_aeskeygenassist_si128(t1, 0x4); + t1 = aes_128_assist(t1, t2); + ks[3] = t1; + + t2 = _mm_aeskeygenassist_si128(t1, 0x8); + t1 = aes_128_assist(t1, t2); + ks[4] = t1; + + t2 = _mm_aeskeygenassist_si128(t1, 0x10); + t1 = aes_128_assist(t1, t2); + ks[5] = t1; + + t2 = _mm_aeskeygenassist_si128(t1, 0x20); + t1 = aes_128_assist(t1, t2); + ks[6] = t1; + + t2 = _mm_aeskeygenassist_si128(t1, 0x40); + t1 = aes_128_assist(t1, t2); + ks[7] = t1; + + t2 = _mm_aeskeygenassist_si128(t1, 0x80); + t1 = aes_128_assist(t1, t2); + ks[8] = t1; + + t2 = _mm_aeskeygenassist_si128(t1, 0x1b); + t1 = aes_128_assist(t1, t2); + ks[9] = t1; + + t2 = _mm_aeskeygenassist_si128(t1, 0x36); + t1 = aes_128_assist(t1, t2); + ks[10] = t1; + + cx->inf.l = 0; + cx->inf.b[0] = 10 * 16; + return EXIT_SUCCESS; +} + +INLINE void aes_192_assist(__m128i* t1, __m128i * t2, __m128i * t3) +{ + __m128i t4; + *t2 = _mm_shuffle_epi32(*t2, 0x55); + t4 = _mm_slli_si128(*t1, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + *t1 = _mm_xor_si128(*t1, *t2); + *t2 = _mm_shuffle_epi32(*t1, 0xff); + t4 = _mm_slli_si128(*t3, 0x4); + *t3 = _mm_xor_si128(*t3, t4); + *t3 = _mm_xor_si128(*t3, *t2); +} + +AES_RETURN aes_ni(encrypt_key192)(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ + __m128i t1, t2, t3; + __m128i *ks = (__m128i*)cx->ks; + + if(!has_aes_ni()) + { + return aes_xi(encrypt_key192)(key, cx); + } + + t1 = _mm_loadu_si128((__m128i*)key); + t3 = _mm_loadu_si128((__m128i*)(key + 16)); + + ks[0] = t1; + ks[1] = t3; + + t2 = _mm_aeskeygenassist_si128(t3, 0x1); + aes_192_assist(&t1, &t2, &t3); + + ks[1] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[1]), _mm_castsi128_pd(t1), 0)); + ks[2] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1)); + + t2 = _mm_aeskeygenassist_si128(t3, 0x2); + aes_192_assist(&t1, &t2, &t3); + ks[3] = t1; + ks[4] = t3; + + t2 = _mm_aeskeygenassist_si128(t3, 0x4); + aes_192_assist(&t1, &t2, &t3); + ks[4] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[4]), _mm_castsi128_pd(t1), 0)); + ks[5] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1)); + + t2 = _mm_aeskeygenassist_si128(t3, 0x8); + aes_192_assist(&t1, &t2, &t3); + ks[6] = t1; + ks[7] = t3; + + t2 = _mm_aeskeygenassist_si128(t3, 0x10); + aes_192_assist(&t1, &t2, &t3); + ks[7] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[7]), _mm_castsi128_pd(t1), 0)); + ks[8] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1)); + + t2 = _mm_aeskeygenassist_si128(t3, 0x20); + aes_192_assist(&t1, &t2, &t3); + ks[9] = t1; + ks[10] = t3; + + t2 = _mm_aeskeygenassist_si128(t3, 0x40); + aes_192_assist(&t1, &t2, &t3); + ks[10] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(ks[10]), _mm_castsi128_pd(t1), 0)); + ks[11] = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(t1), _mm_castsi128_pd(t3), 1)); + + t2 = _mm_aeskeygenassist_si128(t3, 0x80); + aes_192_assist(&t1, &t2, &t3); + ks[12] = t1; + + cx->inf.l = 0; + cx->inf.b[0] = 12 * 16; + return EXIT_SUCCESS; +} + +INLINE void aes_256_assist1(__m128i* t1, __m128i * t2) +{ + __m128i t4; + *t2 = _mm_shuffle_epi32(*t2, 0xff); + t4 = _mm_slli_si128(*t1, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t1 = _mm_xor_si128(*t1, t4); + *t1 = _mm_xor_si128(*t1, *t2); +} + +INLINE void aes_256_assist2(__m128i* t1, __m128i * t3) +{ + __m128i t2, t4; + t4 = _mm_aeskeygenassist_si128(*t1, 0x0); + t2 = _mm_shuffle_epi32(t4, 0xaa); + t4 = _mm_slli_si128(*t3, 0x4); + *t3 = _mm_xor_si128(*t3, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t3 = _mm_xor_si128(*t3, t4); + t4 = _mm_slli_si128(t4, 0x4); + *t3 = _mm_xor_si128(*t3, t4); + *t3 = _mm_xor_si128(*t3, t2); +} + +AES_RETURN aes_ni(encrypt_key256)(const unsigned char *key, aes_encrypt_ctx cx[1]) +{ + __m128i t1, t2, t3; + __m128i *ks = (__m128i*)cx->ks; + + if(!has_aes_ni()) + { + return aes_xi(encrypt_key256)(key, cx); + } + + t1 = _mm_loadu_si128((__m128i*)key); + t3 = _mm_loadu_si128((__m128i*)(key + 16)); + + ks[0] = t1; + ks[1] = t3; + + t2 = _mm_aeskeygenassist_si128(t3, 0x01); + aes_256_assist1(&t1, &t2); + ks[2] = t1; + aes_256_assist2(&t1, &t3); + ks[3] = t3; + + t2 = _mm_aeskeygenassist_si128(t3, 0x02); + aes_256_assist1(&t1, &t2); + ks[4] = t1; + aes_256_assist2(&t1, &t3); + ks[5] = t3; + + t2 = _mm_aeskeygenassist_si128(t3, 0x04); + aes_256_assist1(&t1, &t2); + ks[6] = t1; + aes_256_assist2(&t1, &t3); + ks[7] = t3; + + t2 = _mm_aeskeygenassist_si128(t3, 0x08); + aes_256_assist1(&t1, &t2); + ks[8] = t1; + aes_256_assist2(&t1, &t3); + ks[9] = t3; + + t2 = _mm_aeskeygenassist_si128(t3, 0x10); + aes_256_assist1(&t1, &t2); + ks[10] = t1; + aes_256_assist2(&t1, &t3); + ks[11] = t3; + + t2 = _mm_aeskeygenassist_si128(t3, 0x20); + aes_256_assist1(&t1, &t2); + ks[12] = t1; + aes_256_assist2(&t1, &t3); + ks[13] = t3; + + t2 = _mm_aeskeygenassist_si128(t3, 0x40); + aes_256_assist1(&t1, &t2); + ks[14] = t1; + + cx->inf.l = 0; + cx->inf.b[0] = 14 * 16; + return EXIT_SUCCESS; +} + +INLINE void enc_to_dec(aes_decrypt_ctx cx[1]) +{ + __m128i *ks = (__m128i*)cx->ks; + int j; + + for( j = 1 ; j < (cx->inf.b[0] >> 4) ; ++j ) + ks[j] = _mm_aesimc_si128(ks[j]); +} + +AES_RETURN aes_ni(decrypt_key128)(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ + if(!has_aes_ni()) + { + return aes_xi(decrypt_key128)(key, cx); + } + + if(aes_ni(encrypt_key128)(key, (aes_encrypt_ctx*)cx) == EXIT_SUCCESS) + { + enc_to_dec(cx); + return EXIT_SUCCESS; + } + else + return EXIT_FAILURE; + +} + +AES_RETURN aes_ni(decrypt_key192)(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ + if(!has_aes_ni()) + { + return aes_xi(decrypt_key192)(key, cx); + } + + if(aes_ni(encrypt_key192)(key, (aes_encrypt_ctx*)cx) == EXIT_SUCCESS) + { + enc_to_dec(cx); + return EXIT_SUCCESS; + } + else + return EXIT_FAILURE; +} + +AES_RETURN aes_ni(decrypt_key256)(const unsigned char *key, aes_decrypt_ctx cx[1]) +{ + if(!has_aes_ni()) + { + return aes_xi(decrypt_key256)(key, cx); + } + + if(aes_ni(encrypt_key256)(key, (aes_encrypt_ctx*)cx) == EXIT_SUCCESS) + { + enc_to_dec(cx); + return EXIT_SUCCESS; + } + else + return EXIT_FAILURE; +} + +AES_RETURN aes_ni(encrypt)(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]) +{ + __m128i *key = (__m128i*)cx->ks, t; + + if(cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16) + return EXIT_FAILURE; + + if(!has_aes_ni()) + { + return aes_xi(encrypt)(in, out, cx); + } + + t = _mm_xor_si128(_mm_loadu_si128((__m128i*)in), *(__m128i*)key); + + switch(cx->inf.b[0]) + { + case 14 * 16: + t = _mm_aesenc_si128(t, *(__m128i*)++key); + t = _mm_aesenc_si128(t, *(__m128i*)++key); + case 12 * 16: + t = _mm_aesenc_si128(t, *(__m128i*)++key); + t = _mm_aesenc_si128(t, *(__m128i*)++key); + case 10 * 16: + t = _mm_aesenc_si128(t, *(__m128i*)++key); + t = _mm_aesenc_si128(t, *(__m128i*)++key); + t = _mm_aesenc_si128(t, *(__m128i*)++key); + t = _mm_aesenc_si128(t, *(__m128i*)++key); + t = _mm_aesenc_si128(t, *(__m128i*)++key); + t = _mm_aesenc_si128(t, *(__m128i*)++key); + t = _mm_aesenc_si128(t, *(__m128i*)++key); + t = _mm_aesenc_si128(t, *(__m128i*)++key); + t = _mm_aesenc_si128(t, *(__m128i*)++key); + t = _mm_aesenclast_si128(t, *(__m128i*)++key); + } + + _mm_storeu_si128(&((__m128i*)out)[0], t); + return EXIT_SUCCESS; +} + +AES_RETURN aes_ni(decrypt)(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]) +{ + __m128i *key = (__m128i*)cx->ks + (cx->inf.b[0] >> 4), t; + + if(cx->inf.b[0] != 10 * 16 && cx->inf.b[0] != 12 * 16 && cx->inf.b[0] != 14 * 16) + return EXIT_FAILURE; + + if(!has_aes_ni()) + { + return aes_xi(decrypt)(in, out, cx); + } + + t = _mm_xor_si128(_mm_loadu_si128((__m128i*)in), *(__m128i*)key); + + switch(cx->inf.b[0]) + { + case 14 * 16: + t = _mm_aesdec_si128(t, *(__m128i*)--key); + t = _mm_aesdec_si128(t, *(__m128i*)--key); + case 12 * 16: + t = _mm_aesdec_si128(t, *(__m128i*)--key); + t = _mm_aesdec_si128(t, *(__m128i*)--key); + case 10 * 16: + t = _mm_aesdec_si128(t, *(__m128i*)--key); + t = _mm_aesdec_si128(t, *(__m128i*)--key); + t = _mm_aesdec_si128(t, *(__m128i*)--key); + t = _mm_aesdec_si128(t, *(__m128i*)--key); + t = _mm_aesdec_si128(t, *(__m128i*)--key); + t = _mm_aesdec_si128(t, *(__m128i*)--key); + t = _mm_aesdec_si128(t, *(__m128i*)--key); + t = _mm_aesdec_si128(t, *(__m128i*)--key); + t = _mm_aesdec_si128(t, *(__m128i*)--key); + t = _mm_aesdeclast_si128(t, *(__m128i*)--key); + } + + _mm_storeu_si128((__m128i*)out, t); + return EXIT_SUCCESS; +} + +#ifdef ADD_AESNI_MODE_CALLS +#ifdef USE_AES_CONTEXT + +AES_RETURN aes_CBC_encrypt(const unsigned char *in, + unsigned char *out, + unsigned char ivec[16], + unsigned long length, + const aes_encrypt_ctx cx[1]) +{ + __m128i feedback, data, *key = (__m128i*)cx->ks; + int number_of_rounds = cx->inf.b[0] >> 4, j; + unsigned long i; + + if(number_of_rounds != 10 && number_of_rounds != 12 && number_of_rounds != 14) + return EXIT_FAILURE; + + if(!has_aes_ni()) + { + return aes_cbc_encrypt(in, out, length, ivec, cx); + } + + if(length % 16) + length = length / 16 + 1; + else length /= 16; + feedback = _mm_loadu_si128((__m128i*)ivec); + for(i = 0; i < length; i++) + { + data = _mm_loadu_si128(&((__m128i*)in)[i]); + feedback = _mm_xor_si128(data, feedback); + feedback = _mm_xor_si128(feedback, ((__m128i*)key)[0]); + for(j = 1; j <number_of_rounds; j++) + feedback = _mm_aesenc_si128(feedback, ((__m128i*)key)[j]); + feedback = _mm_aesenclast_si128(feedback, ((__m128i*)key)[j]); + _mm_storeu_si128(&((__m128i*)out)[i], feedback); + } + return EXIT_SUCCESS; +} + +AES_RETURN aes_CBC_decrypt(const unsigned char *in, + unsigned char *out, + unsigned char ivec[16], + unsigned long length, + const aes_decrypt_ctx cx[1]) +{ + __m128i data, feedback, last_in, *key = (__m128i*)cx->ks; + int number_of_rounds = cx->inf.b[0] >> 4, j; + unsigned long i; + + if(number_of_rounds != 10 && number_of_rounds != 12 && number_of_rounds != 14) + return EXIT_FAILURE; + + if(!has_aes_ni()) + { + return aes_cbc_decrypt(in, out, length, ivec, cx); + } + + if(length % 16) + length = length / 16 + 1; + else length /= 16; + feedback = _mm_loadu_si128((__m128i*)ivec); + for(i = 0; i < length; i++) + { + last_in = _mm_loadu_si128(&((__m128i*)in)[i]); + data = _mm_xor_si128(last_in, ((__m128i*)key)[number_of_rounds]); + for(j = number_of_rounds - 1; j > 0; j--) + { + data = _mm_aesdec_si128(data, ((__m128i*)key)[j]); + } + data = _mm_aesdeclast_si128(data, ((__m128i*)key)[0]); + data = _mm_xor_si128(data, feedback); + _mm_storeu_si128(&((__m128i*)out)[i], data); + feedback = last_in; + } + return EXIT_SUCCESS; +} + +static void ctr_inc(unsigned char *ctr_blk) +{ + uint32_t c; + + c = *(uint32_t*)(ctr_blk + 8); + c++; + *(uint32_t*)(ctr_blk + 8) = c; + + if(!c) + *(uint32_t*)(ctr_blk + 12) = *(uint32_t*)(ctr_blk + 12) + 1; +} + +AES_RETURN AES_CTR_encrypt(const unsigned char *in, + unsigned char *out, + const unsigned char ivec[8], + const unsigned char nonce[4], + unsigned long length, + const aes_encrypt_ctx cx[1]) +{ + __m128i ctr_block = { 0 }, *key = (__m128i*)cx->ks, tmp, ONE, BSWAP_EPI64; + int number_of_rounds = cx->inf.b[0] >> 4, j; + unsigned long i; + + if(number_of_rounds != 10 && number_of_rounds != 12 && number_of_rounds != 14) + return EXIT_FAILURE; + + if(!has_aes_ni()) + { + unsigned char ctr_blk[16]; + *(uint64_t*)ctr_blk = *(uint64_t*)ivec; + *(uint32_t*)(ctr_blk + 8) = *(uint32_t*)nonce; + return aes_ctr_crypt(in, out, length, (unsigned char*)ctr_blk, ctr_inc, cx); + } + + if(length % 16) + length = length / 16 + 1; + else length /= 16; + ONE = _mm_set_epi32(0, 1, 0, 0); + BSWAP_EPI64 = _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); +#ifdef _MSC_VER + ctr_block = _mm_insert_epi64(ctr_block, *(long long*)ivec, 1); +#else + ctr_block = _mm_set_epi64(*(__m64*)ivec, *(__m64*)&ctr_block); +#endif + ctr_block = _mm_insert_epi32(ctr_block, *(long*)nonce, 1); + ctr_block = _mm_srli_si128(ctr_block, 4); + ctr_block = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64); + ctr_block = _mm_add_epi64(ctr_block, ONE); + for(i = 0; i < length; i++) + { + tmp = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64); + ctr_block = _mm_add_epi64(ctr_block, ONE); + tmp = _mm_xor_si128(tmp, ((__m128i*)key)[0]); + for(j = 1; j <number_of_rounds; j++) + { + tmp = _mm_aesenc_si128(tmp, ((__m128i*)key)[j]); + }; + tmp = _mm_aesenclast_si128(tmp, ((__m128i*)key)[j]); + tmp = _mm_xor_si128(tmp, _mm_loadu_si128(&((__m128i*)in)[i])); + _mm_storeu_si128(&((__m128i*)out)[i], tmp); + } + return EXIT_SUCCESS; +} + +#else + +void aes_CBC_encrypt(const unsigned char *in, + unsigned char *out, + unsigned char ivec[16], + unsigned long length, + unsigned char *key, + int number_of_rounds) +{ + __m128i feedback, data; + unsigned long i; + int j; + if(length % 16) + length = length / 16 + 1; + else length /= 16; + feedback = _mm_loadu_si128((__m128i*)ivec); + for(i = 0; i < length; i++) + { + data = _mm_loadu_si128(&((__m128i*)in)[i]); + feedback = _mm_xor_si128(data, feedback); + feedback = _mm_xor_si128(feedback, ((__m128i*)key)[0]); + for(j = 1; j <number_of_rounds; j++) + feedback = _mm_aesenc_si128(feedback, ((__m128i*)key)[j]); + feedback = _mm_aesenclast_si128(feedback, ((__m128i*)key)[j]); + _mm_storeu_si128(&((__m128i*)out)[i], feedback); + } +} + +void aes_CBC_decrypt(const unsigned char *in, + unsigned char *out, + unsigned char ivec[16], + unsigned long length, + unsigned char *key, + int number_of_rounds) +{ + __m128i data, feedback, last_in; + unsigned long i; + int j; + if(length % 16) + length = length / 16 + 1; + else length /= 16; + feedback = _mm_loadu_si128((__m128i*)ivec); + for(i = 0; i < length; i++) + { + last_in = _mm_loadu_si128(&((__m128i*)in)[i]); + data = _mm_xor_si128(last_in, ((__m128i*)key)[0]); + for(j = 1; j <number_of_rounds; j++) + { + data = _mm_aesdec_si128(data, ((__m128i*)key)[j]); + } + data = _mm_aesdeclast_si128(data, ((__m128i*)key)[j]); + data = _mm_xor_si128(data, feedback); + _mm_storeu_si128(&((__m128i*)out)[i], data); + feedback = last_in; + } +} + +void AES_CTR_encrypt(const unsigned char *in, + unsigned char *out, + const unsigned char ivec[8], + const unsigned char nonce[4], + unsigned long length, + const unsigned char *key, + int number_of_rounds) +{ + __m128i ctr_block = { 0 }, tmp, ONE, BSWAP_EPI64; + unsigned long i; + int j; + if(length % 16) + length = length / 16 + 1; + else length /= 16; + ONE = _mm_set_epi32(0, 1, 0, 0); + BSWAP_EPI64 = _mm_setr_epi8(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); +#ifdef _MSC_VER + ctr_block = _mm_insert_epi64(ctr_block, *(long long*)ivec, 1); +#else + ctr_block = _mm_set_epi64(*(__m64*)ivec, *(__m64*)&ctr_block); +#endif + ctr_block = _mm_insert_epi32(ctr_block, *(long*)nonce, 1); + ctr_block = _mm_srli_si128(ctr_block, 4); + ctr_block = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64); + ctr_block = _mm_add_epi64(ctr_block, ONE); + for(i = 0; i < length; i++) + { + tmp = _mm_shuffle_epi8(ctr_block, BSWAP_EPI64); + ctr_block = _mm_add_epi64(ctr_block, ONE); + tmp = _mm_xor_si128(tmp, ((__m128i*)key)[0]); + for(j = 1; j <number_of_rounds; j++) + { + tmp = _mm_aesenc_si128(tmp, ((__m128i*)key)[j]); + }; + tmp = _mm_aesenclast_si128(tmp, ((__m128i*)key)[j]); + tmp = _mm_xor_si128(tmp, _mm_loadu_si128(&((__m128i*)in)[i])); + _mm_storeu_si128(&((__m128i*)out)[i], tmp); + } +} +#endif +#endif + +#endif Added: trunk/lvzip/c_source/aes/aes_ni.h =================================================================== --- trunk/lvzip/c_source/aes/aes_ni.h (rev 0) +++ trunk/lvzip/c_source/aes/aes_ni.h 2018-12-09 16:11:37 UTC (rev 1551) @@ -0,0 +1,59 @@ +/* +Copyright (c) 1998-2013, Brian Gladman, Worcester, UK. All rights reserved. + +The redistribution and use of this software (with or without changes) +is allowed without the payment of fees or royalties provided that: + + source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation. + +This software is provided 'as is' with no explicit or implied warranties +in respect of its operation, including, but not limited to, correctness +and fitness for purpose. +--------------------------------------------------------------------------- +Issue Date: 13/11/2013 +*/ + +#ifndef AES_NI_H +#define AES_NI_H + +#define USE_AES_CONTEXT + +#include "aesopt.h" + +#if defined( USE_INTEL_AES_IF_PRESENT ) + +/* map names in C code to make them internal ('name' -> 'aes_name_i') */ +#define aes_xi(x) aes_ ## x ## _i + +/* map names here to provide the external API ('name' -> 'aes_name') */ +#define aes_ni(x) aes_ ## x + +AES_RETURN aes_ni(encrypt_key128)(const unsigned char *key, aes_encrypt_ctx cx[1]); +AES_RETURN aes_ni(encrypt_key192)(const unsigned char *key, aes_encrypt_ctx cx[1]); +AES_RETURN aes_ni(encrypt_key256)(const unsigned char *key, aes_encrypt_ctx cx[1]); + +AES_RETURN aes_ni(decrypt_key128)(const unsigned char *key, aes_decrypt_ctx cx[1]); +AES_RETURN aes_ni(decrypt_key192)(const unsigned char *key, aes_decrypt_ctx cx[1]); +AES_RETURN aes_ni(decrypt_key256)(const unsigned char *key, aes_decrypt_ctx cx[1]); + +AES_RETURN aes_ni(encrypt)(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]); +AES_RETURN aes_ni(decrypt)(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]); + +AES_RETURN aes_xi(encrypt_key128)(const unsigned char *key, aes_encrypt_ctx cx[1]); +AES_RETURN aes_xi(encrypt_key192)(const unsigned char *key, aes_encrypt_ctx cx[1]); +AES_RETURN aes_xi(encrypt_key256)(const unsigned char *key, aes_encrypt_ctx cx[1]); + +AES_RETURN aes_xi(decrypt_key128)(const unsigned char *key, aes_decrypt_ctx cx[1]); +AES_RETURN aes_xi(decrypt_key192)(const unsigned char *key, aes_decrypt_ctx cx[1]); +AES_RETURN aes_xi(decrypt_key256)(const unsigned char *key, aes_decrypt_ctx cx[1]); + +AES_RETURN aes_xi(encrypt)(const unsigned char *in, unsigned char *out, const aes_encrypt_ctx cx[1]); +AES_RETURN aes_xi(decrypt)(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]); + +#endif + +#endif Modified: trunk/lvzip/c_source/aes/aescrypt.c =================================================================== --- trunk/lvzip/c_source/aes/aescrypt.c 2018-12-09 14:12:54 UTC (rev 1550) +++ trunk/lvzip/c_source/aes/aescrypt.c 2018-12-09 16:11:37 UTC (rev 1551) @@ -227,8 +227,9 @@ #endif AES_RETURN aes_xi(decrypt)(const unsigned char *in, unsigned char *out, const aes_decrypt_ctx cx[1]) -{ uint32_t locals(b0, b1); -#if defined( dec_imvars ) +{ + uint32_t locals(b0, b1); +#if defined (dec_imvars) dec_imvars; /* declare variables for inv_mcol() if needed */ #endif const uint32_t *kp; Modified: trunk/lvzip/c_source/aes/aeskey.c =================================================================== --- trunk/lvzip/c_source/aes/aeskey.c 2018-12-09 14:12:54 UTC (rev 1550) +++ trunk/lvzip/c_source/aes/aeskey.c 2018-12-09 16:11:37 UTC (rev 1551) @@ -67,9 +67,7 @@ # undef DEC_KS_UNROLL # endif #endif - #if (FUNCS_IN_C & ENC_KEYING_IN_C) - #if defined(AES_128) || defined( AES_VAR ) #define ke4(k,i) \ Modified: trunk/lvzip/c_source/aes/aesopt.h =================================================================== --- trunk/lvzip/c_source/aes/aesopt.h 2018-12-09 14:12:54 UTC (rev 1550) +++ trunk/lvzip/c_source/aes/aesopt.h 2018-12-09 16:11:37 UTC (rev 1551) @@ -188,7 +188,7 @@ built */ -#if 1 && defined( INTEL_AES_POSSIBLE ) && !defined( USE_INTEL_AES_IF_PRESENT ) +#if 0 && defined( INTEL_AES_POSSIBLE ) && !defined( USE_INTEL_AES_IF_PRESENT ) # define USE_INTEL_AES_IF_PRESENT #endif This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. |