diff --git a/CMakeLists.txt b/CMakeLists.txt index 680126d..4c4d565 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -96,6 +96,7 @@ add_executable(${PROJECT_NAME} src/screen_service.hpp src/screen_service.cpp src/config.cpp src/config.hpp src/utils.cpp src/utils.hpp + src/cpu.cpp src/cpu.hpp src/initcpio.cpp src/initcpio.hpp src/disk.cpp src/disk.hpp src/drivers.cpp src/drivers.hpp diff --git a/meson.build b/meson.build index cf3522c..d457b77 100644 --- a/meson.build +++ b/meson.build @@ -63,6 +63,7 @@ src_files = files( 'src/screen_service.hpp', 'src/screen_service.cpp', 'src/config.cpp', 'src/config.hpp', 'src/utils.cpp', 'src/utils.hpp', + 'src/cpu.cpp', 'src/cpu.hpp', 'src/initcpio.cpp', 'src/initcpio.hpp', 'src/disk.cpp', 'src/disk.hpp', 'src/drivers.cpp', 'src/drivers.hpp', diff --git a/src/cpu.cpp b/src/cpu.cpp new file mode 100644 index 0000000..e3d1242 --- /dev/null +++ b/src/cpu.cpp @@ -0,0 +1,146 @@ +#include "cpu.hpp" + +#include // for uname + +#include + +#include + +namespace utils { + +namespace { + +static uint64_t xgetbv(void) noexcept { + uint32_t eax{}; + uint32_t edx{}; + __asm__ __volatile__("xgetbv\n" + : "=a"(eax), "=d"(edx) + : "c"(0)); + return (static_cast(edx) << 32) | eax; +} + +static void cpuid(uint32_t out[4], uint32_t id) noexcept { + __asm__ __volatile__("cpuid\n" + : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) + : "a"(id)); +} + +static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) noexcept { + __asm__ __volatile__("cpuid\n" + : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) + : "a"(id), "c"(sid)); +} + +enum cpu_feature { + SSE2 = 1 << 0, + SSSE3 = 1 << 1, + SSE41 = 1 << 2, + SSE42 = 1 << 3, + AVX = 1 << 4, + AVX2 = 1 << 5, + AVX512F = 1 << 6, + AVX512BW = 1 << 7, + AVX512CD = 1 << 8, + AVX512DQ = 1 << 9, + AVX512VL = 1 << 10, + /* ... */ + UNDEFINED = 1 << 30 +}; + +static int32_t g_cpu_features = UNDEFINED; + +static int32_t get_cpu_features() noexcept { + if (g_cpu_features != UNDEFINED) { + return g_cpu_features; + } +#if defined(__x86_64__) || defined(__i386__) + uint32_t regs[4]{}; + uint32_t *eax{®s[0]}, *ebx{®s[1]}, *ecx{®s[2]}, *edx{®s[3]}; + static_cast(edx); + int32_t features = 0; + cpuid(regs, 0); + const int32_t max_id = static_cast(*eax); + cpuid(regs, 1); +#if defined(__amd64__) + features |= SSE2; +#else + if (*edx & (1UL << 26)) + features |= SSE2; +#endif + if (*ecx & (1UL << 0)) + features |= SSSE3; + if (*ecx & (1UL << 19)) + features |= SSE41; + if (*ecx & (1UL << 20)) + features |= SSE42; + + if (*ecx & (1UL << 27)) { // OSXSAVE + const uint64_t mask = xgetbv(); + if ((mask & 6) == 6) { // SSE and AVX states + if (*ecx & (1UL << 28)) + features |= AVX; + if (max_id >= 7) { + cpuidex(regs, 7, 0); + if (*ebx & (1UL << 5)) + features |= AVX2; + if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm + if (*ebx & (1UL << 31)) + features |= AVX512VL; + if (*ebx & (1UL << 16)) + features |= AVX512F; + if (*ebx & (1UL << 30)) + features |= AVX512BW; + if (*ebx & (1UL << 28)) + features |= AVX512CD; + if (*ebx & (1UL << 17)) + features |= AVX512DQ; + } + } + } + } + g_cpu_features = features; + return features; +#else + /* How to detect NEON? */ + return 0; +#endif +} + +} // namespace + +auto get_isa_levels() noexcept -> std::vector { + std::vector supported_isa_levels; + + { + struct utsname un; + uname(&un); + supported_isa_levels.push_back(std::string{un.machine}); + } + + const int32_t features = get_cpu_features(); + + /* Check for x86_64_v2 */ + const bool supports_v2 = (features & SSSE3) && (features & SSE41) && (features & SSE42); + if (supports_v2) { + spdlog::info("cpu supports x86_64_v2"); + supported_isa_levels.push_back("x86_64_v2"); + } + + /* Check for x86_64_v3 */ + const bool supports_v3 = (supports_v2) && (features & AVX) && (features & AVX2); + if (supports_v3) { + spdlog::info("cpu supports x86_64_v3"); + supported_isa_levels.push_back("x86_64_v3"); + } + + /* Check for x86_64_v4 */ + const bool supports_v4 = (supports_v3) && (features & AVX512F) && (features & AVX512VL) && (features & AVX512BW) && (features & AVX512CD) && (features & AVX512DQ); + if (supports_v4) { + spdlog::info("cpu supports x86_64_v4"); + supported_isa_levels.push_back("x86_64_v4"); + } + + return supported_isa_levels; +} + +} // namespace utils diff --git a/src/cpu.hpp b/src/cpu.hpp new file mode 100644 index 0000000..c4d6b93 --- /dev/null +++ b/src/cpu.hpp @@ -0,0 +1,13 @@ +#ifndef CPU_HPP +#define CPU_HPP + +#include // for string +#include // for vector + +namespace utils { + +auto get_isa_levels() noexcept -> std::vector; + +} // namespace utils + +#endif // CPU_HPP diff --git a/src/utils.cpp b/src/utils.cpp index 7e05cd5..9b658f5 100644 --- a/src/utils.cpp +++ b/src/utils.cpp @@ -1,5 +1,6 @@ #include "utils.hpp" #include "config.hpp" +#include "cpu.hpp" #include "definitions.hpp" #include "follow_process_log.hpp" #include "initcpio.hpp" @@ -35,6 +36,7 @@ #pragma GCC diagnostic ignored "-Wold-style-cast" #endif +#include #include #include #include @@ -1488,8 +1490,8 @@ void id_system() noexcept { } void try_v3() noexcept { - const auto& ret_status = utils::exec("/lib/ld-linux-x86-64.so.2 --help|grep 'x86-64-v3 ('|awk '{print $1}' 2> /dev/null"); - if (ret_status != "x86-64-v3") { + const auto& isa_levels = utils::get_isa_levels(); + if (ranges::contains(isa_levels, "x86-64-v3")) { spdlog::warn("x86-64-v3 is not supported"); return; }