From 7b6aa41ca8dc3ca2a0c9d01ff9870794ad98e403 Mon Sep 17 00:00:00 2001 From: R4SAS Date: Thu, 31 Aug 2023 16:52:51 +0000 Subject: [PATCH] CPU: remove AVX code, switch to __builtin for AES detection (#1959) * [cpu] remove avx detect and code blocks, try to switch to __builtin * [cpu] use __builtin_* only on x86 systems * [cpu] perform check in separate function * [cpu] set AES definition on MSVC * update x86 and aes support checks at compile time * [cmake] update comment about AES on MSVC --- .editorconfig | 3 + build/CMakeLists.txt | 11 +- contrib/i2pd.conf | 2 - daemon/Daemon.cpp | 3 +- libi2pd/CPU.cpp | 54 +++---- libi2pd/CPU.h | 5 +- libi2pd/Config.cpp | 4 +- libi2pd/Crypto.cpp | 332 ++++++++++++++++++++++--------------------- libi2pd/Crypto.h | 2 +- libi2pd/Identity.cpp | 29 +--- libi2pd/api.cpp | 3 +- 11 files changed, 219 insertions(+), 229 deletions(-) diff --git a/.editorconfig b/.editorconfig index e1f1243a..0f57a84a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -34,3 +34,6 @@ trim_trailing_whitespace = false [*.yml] indent_style = space indent_size = 2 + +[*.patch] +trim_trailing_whitespace = false diff --git a/build/CMakeLists.txt b/build/CMakeLists.txt index f5d01a0b..3185ffab 100644 --- a/build/CMakeLists.txt +++ b/build/CMakeLists.txt @@ -197,14 +197,11 @@ endif() # Note: AES-NI and AVX is available on x86-based CPU's. # Here also ARM64 implementation, but currently we don't support it. -# MSVC is not supported. -if(MSVC) - message(STATUS "AES-NI is not supported on MSVC, option was disabled") - set(WITH_AESNI OFF) -endif() - +# MSVC is not supported due to different ASM processing, so we hope OpenSSL has its own checks to run optimized code. if(WITH_AESNI AND (ARCHITECTURE MATCHES "x86_64" OR ARCHITECTURE MATCHES "i386")) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") + if(NOT MSVC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") + endif() add_definitions(-D__AES__) endif() diff --git a/contrib/i2pd.conf b/contrib/i2pd.conf index 5facbbd5..545973a3 100644 --- a/contrib/i2pd.conf +++ b/contrib/i2pd.conf @@ -280,8 +280,6 @@ verify = true [cpuext] ## Use CPU AES-NI instructions set when work with cryptography when available (default: true) # aesni = true -## Use CPU AVX instructions set when work with cryptography when available (default: true) -# avx = true ## Force usage of CPU instructions set, even if they not found (default: false) ## DO NOT TOUCH that option if you really don't know what are you doing! # force = false diff --git a/daemon/Daemon.cpp b/daemon/Daemon.cpp index 8e9721db..f04236fe 100644 --- a/daemon/Daemon.cpp +++ b/daemon/Daemon.cpp @@ -150,12 +150,11 @@ namespace util bool precomputation; i2p::config::GetOption("precomputation.elgamal", precomputation); bool aesni; i2p::config::GetOption("cpuext.aesni", aesni); - bool avx; i2p::config::GetOption("cpuext.avx", avx); bool forceCpuExt; i2p::config::GetOption("cpuext.force", forceCpuExt); bool ssu; i2p::config::GetOption("ssu", ssu); if (!ssu && i2p::config::IsDefault ("precomputation.elgamal")) precomputation = false; // we don't elgamal table if no ssu, unless it's specified explicitly - i2p::crypto::InitCrypto (precomputation, aesni, avx, forceCpuExt); + i2p::crypto::InitCrypto (precomputation, aesni, forceCpuExt); i2p::transport::InitAddressFromIface (); // get address4/6 from interfaces diff --git a/libi2pd/CPU.cpp b/libi2pd/CPU.cpp index 0804e2ac..d42f1ddd 100644 --- a/libi2pd/CPU.cpp +++ b/libi2pd/CPU.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2013-2020, The PurpleI2P Project +* Copyright (c) 2013-2023, The PurpleI2P Project * * This file is part of Purple i2pd project and licensed under BSD3 * @@ -7,52 +7,52 @@ */ #include "CPU.h" -#if defined(__x86_64__) || defined(__i386__) -#include -#endif #include "Log.h" +#if defined(_MSC_VER) +#include + #ifndef bit_AES -#define bit_AES (1 << 25) + #define bit_AES (1 << 25) #endif -#ifndef bit_AVX -#define bit_AVX (1 << 28) #endif - namespace i2p { namespace cpu { bool aesni = false; - bool avx = false; - void Detect(bool AesSwitch, bool AvxSwitch, bool force) + inline bool cpu_support_aes() { -#if defined(__x86_64__) || defined(__i386__) - int info[4]; - __cpuid(0, info[0], info[1], info[2], info[3]); - if (info[0] >= 0x00000001) { - __cpuid(0x00000001, info[0], info[1], info[2], info[3]); -#if defined (_WIN32) && (WINVER == 0x0501) // WinXP - if (AesSwitch && force) { // only if forced +#if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__)) +#if defined(_MSC_VER) + int cpu_info[4]; + __cpuid(cpu_info, 1); + return ((cpu_info[2] & bit_AES) != 0); +#elif defined(__clang__) +#if __clang_major__ >= 6 + __builtin_cpu_init(); +#endif + return __builtin_cpu_supports("aes"); +#elif defined(__GNUC__) + __builtin_cpu_init(); + return __builtin_cpu_supports("aes"); #else - if ((info[2] & bit_AES && AesSwitch) || (AesSwitch && force)) { + return false; #endif - aesni = true; - } -#if defined (_WIN32) && (WINVER == 0x0501) // WinXP - if (AvxSwitch && force) { // only if forced #else - if ((info[2] & bit_AVX && AvxSwitch) || (AvxSwitch && force)) { + return false; #endif - avx = true; - } + } + + void Detect(bool AesSwitch, bool force) + { + if ((cpu_support_aes() && AesSwitch) || (AesSwitch && force)) { + aesni = true; } -#endif // defined(__x86_64__) || defined(__i386__) LogPrint(eLogInfo, "AESNI ", (aesni ? "enabled" : "disabled")); - LogPrint(eLogInfo, "AVX ", (avx ? "enabled" : "disabled")); } } } diff --git a/libi2pd/CPU.h b/libi2pd/CPU.h index f021bccb..a0695884 100644 --- a/libi2pd/CPU.h +++ b/libi2pd/CPU.h @@ -1,5 +1,5 @@ /* -* Copyright (c) 2013-2020, The PurpleI2P Project +* Copyright (c) 2013-2023, The PurpleI2P Project * * This file is part of Purple i2pd project and licensed under BSD3 * @@ -14,9 +14,8 @@ namespace i2p namespace cpu { extern bool aesni; - extern bool avx; - void Detect(bool AesSwitch, bool AvxSwitch, bool force); + void Detect(bool AesSwitch, bool force); } } diff --git a/libi2pd/Config.cpp b/libi2pd/Config.cpp index 6b515ef9..5551b010 100644 --- a/libi2pd/Config.cpp +++ b/libi2pd/Config.cpp @@ -193,7 +193,7 @@ namespace config { options_description precomputation("Precomputation options"); precomputation.add_options() ("precomputation.elgamal", -#if defined(__x86_64__) +#if (defined(_M_AMD64) || defined(__x86_64__)) value()->default_value(false), #else value()->default_value(true), @@ -308,7 +308,7 @@ namespace config { options_description cpuext("CPU encryption extensions options"); cpuext.add_options() ("cpuext.aesni", bool_switch()->default_value(true), "Use auto detection for AESNI CPU extensions. If false, AESNI will be not used") - ("cpuext.avx", bool_switch()->default_value(true), "Use auto detection for AVX CPU extensions. If false, AVX will be not used") + ("cpuext.avx", bool_switch()->default_value(false), "Deprecated option") ("cpuext.force", bool_switch()->default_value(false), "Force usage of CPU extensions. Useful when cpuinfo is not available on virtual machines") ; diff --git a/libi2pd/Crypto.cpp b/libi2pd/Crypto.cpp index 12087443..b5bc33d1 100644 --- a/libi2pd/Crypto.cpp +++ b/libi2pd/Crypto.cpp @@ -1,5 +1,5 @@ /* -* Copyright (c) 2013-2022, The PurpleI2P Project +* Copyright (c) 2013-2023, The PurpleI2P Project * * This file is part of Purple i2pd project and licensed under BSD3 * @@ -28,6 +28,12 @@ #include "I2PEndian.h" #include "Log.h" +#if defined(__AES__) && !defined(_MSC_VER) && ((defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__))) + #define SUPPORTS_AES 1 +#else + #define SUPPORTS_AES 0 +#endif + namespace i2p { namespace crypto @@ -361,7 +367,7 @@ namespace crypto BIGNUM * b1 = BN_CTX_get (ctx); BIGNUM * b = BN_CTX_get (ctx); // select random k -#if defined(__x86_64__) +#if (defined(_M_AMD64) || defined(__x86_64__)) BN_rand (k, ELGAMAL_FULL_EXPONENT_NUM_BITS, -1, 1); // full exponent for x64 #else BN_rand (k, ELGAMAL_SHORT_EXPONENT_NUM_BITS, -1, 1); // short exponent of 226 bits @@ -428,7 +434,7 @@ namespace crypto void GenerateElGamalKeyPair (uint8_t * priv, uint8_t * pub) { -#if defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER) +#if (defined(_M_AMD64) || defined(__x86_64__)) || (defined(_M_IX86) || defined(__i386__)) || defined(_MSC_VER) RAND_bytes (priv, 256); #else // lower 226 bits (28 bytes and 2 bits) only. short exponent @@ -555,7 +561,7 @@ namespace crypto } // AES -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES #define KeyExpansion256(round0,round1) \ "pshufd $0xff, %%xmm2, %%xmm2 \n" \ "movaps %%xmm1, %%xmm4 \n" \ @@ -580,7 +586,7 @@ namespace crypto "movaps %%xmm3, "#round1"(%[sched]) \n" #endif -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES void ECBCryptoAESNI::ExpandKey (const AESKey& key) { __asm__ @@ -621,7 +627,7 @@ namespace crypto #endif -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES #define EncryptAES256(sched) \ "pxor (%["#sched"]), %%xmm0 \n" \ "aesenc 16(%["#sched"]), %%xmm0 \n" \ @@ -642,16 +648,18 @@ namespace crypto void ECBEncryption::Encrypt (const ChipherBlock * in, ChipherBlock * out) { -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES if(i2p::cpu::aesni) { __asm__ - ( - "movups (%[in]), %%xmm0 \n" - EncryptAES256(sched) - "movups %%xmm0, (%[out]) \n" - : : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory" - ); + ( + "movups (%[in]), %%xmm0 \n" + EncryptAES256(sched) + "movups %%xmm0, (%[out]) \n" + : + : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) + : "%xmm0", "memory" + ); } else #endif @@ -660,7 +668,7 @@ namespace crypto } } -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES #define DecryptAES256(sched) \ "pxor 224(%["#sched"]), %%xmm0 \n" \ "aesdec 208(%["#sched"]), %%xmm0 \n" \ @@ -681,16 +689,18 @@ namespace crypto void ECBDecryption::Decrypt (const ChipherBlock * in, ChipherBlock * out) { -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES if(i2p::cpu::aesni) { __asm__ - ( - "movups (%[in]), %%xmm0 \n" - DecryptAES256(sched) - "movups %%xmm0, (%[out]) \n" - : : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) : "%xmm0", "memory" - ); + ( + "movups (%[in]), %%xmm0 \n" + DecryptAES256(sched) + "movups %%xmm0, (%[out]) \n" + : + : [sched]"r"(GetKeySchedule ()), [in]"r"(in), [out]"r"(out) + : "%xmm0", "memory" + ); } else #endif @@ -699,7 +709,7 @@ namespace crypto } } -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES #define CallAESIMC(offset) \ "movaps "#offset"(%[shed]), %%xmm0 \n" \ "aesimc %%xmm0, %%xmm0 \n" \ @@ -708,7 +718,7 @@ namespace crypto void ECBEncryption::SetKey (const AESKey& key) { -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES if(i2p::cpu::aesni) { ExpandKey (key); @@ -722,28 +732,30 @@ namespace crypto void ECBDecryption::SetKey (const AESKey& key) { -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES if(i2p::cpu::aesni) { ExpandKey (key); // expand encryption key first // then invert it using aesimc __asm__ - ( - CallAESIMC(16) - CallAESIMC(32) - CallAESIMC(48) - CallAESIMC(64) - CallAESIMC(80) - CallAESIMC(96) - CallAESIMC(112) - CallAESIMC(128) - CallAESIMC(144) - CallAESIMC(160) - CallAESIMC(176) - CallAESIMC(192) - CallAESIMC(208) - : : [shed]"r"(GetKeySchedule ()) : "%xmm0", "memory" - ); + ( + CallAESIMC(16) + CallAESIMC(32) + CallAESIMC(48) + CallAESIMC(64) + CallAESIMC(80) + CallAESIMC(96) + CallAESIMC(112) + CallAESIMC(128) + CallAESIMC(144) + CallAESIMC(160) + CallAESIMC(176) + CallAESIMC(192) + CallAESIMC(208) + : + : [shed]"r"(GetKeySchedule ()) + : "%xmm0", "memory" + ); } else #endif @@ -754,28 +766,28 @@ namespace crypto void CBCEncryption::Encrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out) { -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES if(i2p::cpu::aesni) { __asm__ - ( - "movups (%[iv]), %%xmm1 \n" - "1: \n" - "movups (%[in]), %%xmm0 \n" - "pxor %%xmm1, %%xmm0 \n" - EncryptAES256(sched) - "movaps %%xmm0, %%xmm1 \n" - "movups %%xmm0, (%[out]) \n" - "add $16, %[in] \n" - "add $16, %[out] \n" - "dec %[num] \n" - "jnz 1b \n" - "movups %%xmm1, (%[iv]) \n" - : - : [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()), - [in]"r"(in), [out]"r"(out), [num]"r"(numBlocks) - : "%xmm0", "%xmm1", "cc", "memory" - ); + ( + "movups (%[iv]), %%xmm1 \n" + "1: \n" + "movups (%[in]), %%xmm0 \n" + "pxor %%xmm1, %%xmm0 \n" + EncryptAES256(sched) + "movaps %%xmm0, %%xmm1 \n" + "movups %%xmm0, (%[out]) \n" + "add $16, %[in] \n" + "add $16, %[out] \n" + "dec %[num] \n" + "jnz 1b \n" + "movups %%xmm1, (%[iv]) \n" + : + : [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()), + [in]"r"(in), [out]"r"(out), [num]"r"(numBlocks) + : "%xmm0", "%xmm1", "cc", "memory" + ); } else #endif @@ -799,22 +811,22 @@ namespace crypto void CBCEncryption::Encrypt (const uint8_t * in, uint8_t * out) { -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES if(i2p::cpu::aesni) { __asm__ - ( - "movups (%[iv]), %%xmm1 \n" - "movups (%[in]), %%xmm0 \n" - "pxor %%xmm1, %%xmm0 \n" - EncryptAES256(sched) - "movups %%xmm0, (%[out]) \n" - "movups %%xmm0, (%[iv]) \n" - : - : [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()), - [in]"r"(in), [out]"r"(out) - : "%xmm0", "%xmm1", "memory" - ); + ( + "movups (%[iv]), %%xmm1 \n" + "movups (%[in]), %%xmm0 \n" + "pxor %%xmm1, %%xmm0 \n" + EncryptAES256(sched) + "movups %%xmm0, (%[out]) \n" + "movups %%xmm0, (%[iv]) \n" + : + : [iv]"r"((uint8_t *)m_LastBlock), [sched]"r"(m_ECBEncryption.GetKeySchedule ()), + [in]"r"(in), [out]"r"(out) + : "%xmm0", "%xmm1", "memory" + ); } else #endif @@ -823,29 +835,29 @@ namespace crypto void CBCDecryption::Decrypt (int numBlocks, const ChipherBlock * in, ChipherBlock * out) { -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES if(i2p::cpu::aesni) { __asm__ - ( - "movups (%[iv]), %%xmm1 \n" - "1: \n" - "movups (%[in]), %%xmm0 \n" - "movaps %%xmm0, %%xmm2 \n" - DecryptAES256(sched) - "pxor %%xmm1, %%xmm0 \n" - "movups %%xmm0, (%[out]) \n" - "movaps %%xmm2, %%xmm1 \n" - "add $16, %[in] \n" - "add $16, %[out] \n" - "dec %[num] \n" - "jnz 1b \n" - "movups %%xmm1, (%[iv]) \n" - : - : [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()), - [in]"r"(in), [out]"r"(out), [num]"r"(numBlocks) - : "%xmm0", "%xmm1", "%xmm2", "cc", "memory" - ); + ( + "movups (%[iv]), %%xmm1 \n" + "1: \n" + "movups (%[in]), %%xmm0 \n" + "movaps %%xmm0, %%xmm2 \n" + DecryptAES256(sched) + "pxor %%xmm1, %%xmm0 \n" + "movups %%xmm0, (%[out]) \n" + "movaps %%xmm2, %%xmm1 \n" + "add $16, %[in] \n" + "add $16, %[out] \n" + "dec %[num] \n" + "jnz 1b \n" + "movups %%xmm1, (%[iv]) \n" + : + : [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()), + [in]"r"(in), [out]"r"(out), [num]"r"(numBlocks) + : "%xmm0", "%xmm1", "%xmm2", "cc", "memory" + ); } else #endif @@ -869,22 +881,22 @@ namespace crypto void CBCDecryption::Decrypt (const uint8_t * in, uint8_t * out) { -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES if(i2p::cpu::aesni) { __asm__ - ( - "movups (%[iv]), %%xmm1 \n" - "movups (%[in]), %%xmm0 \n" - "movups %%xmm0, (%[iv]) \n" - DecryptAES256(sched) - "pxor %%xmm1, %%xmm0 \n" - "movups %%xmm0, (%[out]) \n" - : - : [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()), - [in]"r"(in), [out]"r"(out) - : "%xmm0", "%xmm1", "memory" - ); + ( + "movups (%[iv]), %%xmm1 \n" + "movups (%[in]), %%xmm0 \n" + "movups %%xmm0, (%[iv]) \n" + DecryptAES256(sched) + "pxor %%xmm1, %%xmm0 \n" + "movups %%xmm0, (%[out]) \n" + : + : [iv]"r"((uint8_t *)m_IV), [sched]"r"(m_ECBDecryption.GetKeySchedule ()), + [in]"r"(in), [out]"r"(out) + : "%xmm0", "%xmm1", "memory" + ); } else #endif @@ -893,34 +905,34 @@ namespace crypto void TunnelEncryption::Encrypt (const uint8_t * in, uint8_t * out) { -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES if(i2p::cpu::aesni) { __asm__ - ( - // encrypt IV - "movups (%[in]), %%xmm0 \n" - EncryptAES256(sched_iv) - "movaps %%xmm0, %%xmm1 \n" - // double IV encryption - EncryptAES256(sched_iv) - "movups %%xmm0, (%[out]) \n" - // encrypt data, IV is xmm1 - "1: \n" - "add $16, %[in] \n" - "add $16, %[out] \n" - "movups (%[in]), %%xmm0 \n" - "pxor %%xmm1, %%xmm0 \n" - EncryptAES256(sched_l) - "movaps %%xmm0, %%xmm1 \n" - "movups %%xmm0, (%[out]) \n" - "dec %[num] \n" - "jnz 1b \n" - : - : [sched_iv]"r"(m_IVEncryption.GetKeySchedule ()), [sched_l]"r"(m_LayerEncryption.ECB().GetKeySchedule ()), - [in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes - : "%xmm0", "%xmm1", "cc", "memory" - ); + ( + // encrypt IV + "movups (%[in]), %%xmm0 \n" + EncryptAES256(sched_iv) + "movaps %%xmm0, %%xmm1 \n" + // double IV encryption + EncryptAES256(sched_iv) + "movups %%xmm0, (%[out]) \n" + // encrypt data, IV is xmm1 + "1: \n" + "add $16, %[in] \n" + "add $16, %[out] \n" + "movups (%[in]), %%xmm0 \n" + "pxor %%xmm1, %%xmm0 \n" + EncryptAES256(sched_l) + "movaps %%xmm0, %%xmm1 \n" + "movups %%xmm0, (%[out]) \n" + "dec %[num] \n" + "jnz 1b \n" + : + : [sched_iv]"r"(m_IVEncryption.GetKeySchedule ()), [sched_l]"r"(m_LayerEncryption.ECB().GetKeySchedule ()), + [in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes + : "%xmm0", "%xmm1", "cc", "memory" + ); } else #endif @@ -934,35 +946,35 @@ namespace crypto void TunnelDecryption::Decrypt (const uint8_t * in, uint8_t * out) { -#if defined(__AES__) && (defined(__x86_64__) || defined(__i386__)) +#if SUPPORTS_AES if(i2p::cpu::aesni) { __asm__ - ( - // decrypt IV - "movups (%[in]), %%xmm0 \n" - DecryptAES256(sched_iv) - "movaps %%xmm0, %%xmm1 \n" - // double IV encryption - DecryptAES256(sched_iv) - "movups %%xmm0, (%[out]) \n" - // decrypt data, IV is xmm1 - "1: \n" - "add $16, %[in] \n" - "add $16, %[out] \n" - "movups (%[in]), %%xmm0 \n" - "movaps %%xmm0, %%xmm2 \n" - DecryptAES256(sched_l) - "pxor %%xmm1, %%xmm0 \n" - "movups %%xmm0, (%[out]) \n" - "movaps %%xmm2, %%xmm1 \n" - "dec %[num] \n" - "jnz 1b \n" - : - : [sched_iv]"r"(m_IVDecryption.GetKeySchedule ()), [sched_l]"r"(m_LayerDecryption.ECB().GetKeySchedule ()), - [in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes - : "%xmm0", "%xmm1", "%xmm2", "cc", "memory" - ); + ( + // decrypt IV + "movups (%[in]), %%xmm0 \n" + DecryptAES256(sched_iv) + "movaps %%xmm0, %%xmm1 \n" + // double IV encryption + DecryptAES256(sched_iv) + "movups %%xmm0, (%[out]) \n" + // decrypt data, IV is xmm1 + "1: \n" + "add $16, %[in] \n" + "add $16, %[out] \n" + "movups (%[in]), %%xmm0 \n" + "movaps %%xmm0, %%xmm2 \n" + DecryptAES256(sched_l) + "pxor %%xmm1, %%xmm0 \n" + "movups %%xmm0, (%[out]) \n" + "movaps %%xmm2, %%xmm1 \n" + "dec %[num] \n" + "jnz 1b \n" + : + : [sched_iv]"r"(m_IVDecryption.GetKeySchedule ()), [sched_l]"r"(m_LayerDecryption.ECB().GetKeySchedule ()), + [in]"r"(in), [out]"r"(out), [num]"r"(63) // 63 blocks = 1008 bytes + : "%xmm0", "%xmm1", "%xmm2", "cc", "memory" + ); } else #endif @@ -1285,9 +1297,9 @@ namespace crypto } }*/ - void InitCrypto (bool precomputation, bool aesni, bool avx, bool force) + void InitCrypto (bool precomputation, bool aesni, bool force) { - i2p::cpu::Detect (aesni, avx, force); + i2p::cpu::Detect (aesni, force); #if LEGACY_OPENSSL SSL_library_init (); #endif @@ -1297,7 +1309,7 @@ namespace crypto CRYPTO_set_locking_callback (OpensslLockingCallback);*/ if (precomputation) { -#if defined(__x86_64__) +#if (defined(_M_AMD64) || defined(__x86_64__)) g_ElggTable = new BIGNUM * [ELGAMAL_FULL_EXPONENT_NUM_BYTES][255]; PrecalculateElggTable (g_ElggTable, ELGAMAL_FULL_EXPONENT_NUM_BYTES); #else @@ -1312,7 +1324,7 @@ namespace crypto if (g_ElggTable) { DestroyElggTable (g_ElggTable, -#if defined(__x86_64__) +#if (defined(_M_AMD64) || defined(__x86_64__)) ELGAMAL_FULL_EXPONENT_NUM_BYTES #else ELGAMAL_SHORT_EXPONENT_NUM_BYTES diff --git a/libi2pd/Crypto.h b/libi2pd/Crypto.h index d4cbda97..1419293b 100644 --- a/libi2pd/Crypto.h +++ b/libi2pd/Crypto.h @@ -307,7 +307,7 @@ namespace crypto void InitNoiseIKState (NoiseSymmetricState& state, const uint8_t * pub); // Noise_IK (ratchets) // init and terminate - void InitCrypto (bool precomputation, bool aesni, bool avx, bool force); + void InitCrypto (bool precomputation, bool aesni, bool force); void TerminateCrypto (); } } diff --git a/libi2pd/Identity.cpp b/libi2pd/Identity.cpp index a4a9f716..3a659c11 100644 --- a/libi2pd/Identity.cpp +++ b/libi2pd/Identity.cpp @@ -803,29 +803,12 @@ namespace data XORMetric operator^(const IdentHash& key1, const IdentHash& key2) { XORMetric m; -#if (defined(__x86_64__) || defined(__i386__)) && defined(__AVX__) // not all X86 targets supports AVX (like old Pentium, see #1600) - if(i2p::cpu::avx) - { - __asm__ - ( - "vmovups %1, %%ymm0 \n" - "vmovups %2, %%ymm1 \n" - "vxorps %%ymm0, %%ymm1, %%ymm1 \n" - "vmovups %%ymm1, %0 \n" - : "=m"(*m.metric) - : "m"(*key1), "m"(*key2) - : "memory", "%xmm0", "%xmm1" // should be replaced by %ymm0/1 once supported by compiler - ); - } - else -#endif - { - const uint64_t * hash1 = key1.GetLL (), * hash2 = key2.GetLL (); - m.metric_ll[0] = hash1[0] ^ hash2[0]; - m.metric_ll[1] = hash1[1] ^ hash2[1]; - m.metric_ll[2] = hash1[2] ^ hash2[2]; - m.metric_ll[3] = hash1[3] ^ hash2[3]; - } + + const uint64_t * hash1 = key1.GetLL (), * hash2 = key2.GetLL (); + m.metric_ll[0] = hash1[0] ^ hash2[0]; + m.metric_ll[1] = hash1[1] ^ hash2[1]; + m.metric_ll[2] = hash1[2] ^ hash2[2]; + m.metric_ll[3] = hash1[3] ^ hash2[3]; return m; } diff --git a/libi2pd/api.cpp b/libi2pd/api.cpp index 3bfb801c..ad24519f 100644 --- a/libi2pd/api.cpp +++ b/libi2pd/api.cpp @@ -38,9 +38,8 @@ namespace api bool precomputation; i2p::config::GetOption("precomputation.elgamal", precomputation); bool aesni; i2p::config::GetOption("cpuext.aesni", aesni); - bool avx; i2p::config::GetOption("cpuext.avx", avx); bool forceCpuExt; i2p::config::GetOption("cpuext.force", forceCpuExt); - i2p::crypto::InitCrypto (precomputation, aesni, avx, forceCpuExt); + i2p::crypto::InitCrypto (precomputation, aesni, forceCpuExt); int netID; i2p::config::GetOption("netid", netID); i2p::context.SetNetID (netID);