Skip to content

Commit

Permalink
Detect when AVX is disabled via OSXSAVE (#1182)
Browse files Browse the repository at this point in the history
**Issue #:**
internal V1647663988

[SEV-SNP enabled EC2 instances](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/sev-snp.html) crash when using AWS CLI to upload with checksum algorithm CRC64NVME. [AWS CLI v2](https://github.com/aws/aws-cli/tree/v2) uses aws-c-common under the hood to do CRC64NVME.

**Investigation:**
aws-c-common has checks for AVX support, using [CPUID](https://en.wikipedia.org/wiki/CPUID). But apparently, just because CPUID reports that it can do AVX, doesn't necessarily mean it's enabled by the OS. On SEV-SNP enabled instances, AVX is not allowed.

GCC had the same bug until 7.4: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100

Stackoverflow question: https://stackoverflow.com/questions/72522885/are-the-xgetbv-and-cpuid-checks-sufficient-to-guarantee-avx2-support

**Description of changes:**
* Do further checks before deciding that AVX can be used.
* Move all Intel feature detection into 1 function.
   * This is simpler, it's how we're doing feature detection on some other architectures.
   * Add checks to find the max value that CPUID accepts. We weren't doing this before.
  • Loading branch information
graebm authored Jan 23, 2025
1 parent 0c79778 commit f6622ac
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 102 deletions.
11 changes: 11 additions & 0 deletions source/arch/intel/asm/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,14 @@ void aws_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd) {
abcd[2] = ecx;
abcd[3] = edx;
}

uint64_t aws_run_xgetbv(uint32_t xcr) {
/* NOTE: we could have used the _xgetbv() intrinsic in <immintrin.h>, but it's missing from GCC < 9.0:
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=71659 */

/* xgetbv writes high and low of 64bit value to EAX:EDX */
uint32_t eax;
uint32_t edx;
__asm__ __volatile__("xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
return (((uint64_t)eax) << 32) | edx;
}
170 changes: 68 additions & 102 deletions source/arch/intel/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,57 +13,70 @@
#include <stdlib.h>

extern void aws_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd);
extern uint64_t aws_run_xgetbv(uint32_t xcr);

typedef bool(has_feature_fn)(void);
static bool s_cpu_features[AWS_CPU_FEATURE_COUNT];
static bool s_cpu_features_cached;

static bool s_has_clmul(void) {
static void s_cache_cpu_features(void) {
/***************************************************************************
* First, find the max EAX value we can pass to CPUID without undefined behavior
* https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=0:_Highest_Function_Parameter_and_Manufacturer_ID
**************************************************************************/
uint32_t abcd[4];
uint32_t clmul_mask = 0x00000002;
aws_run_cpuid(1, 0, abcd);

if ((abcd[2] & clmul_mask) != clmul_mask)
return false;

return true;
}

static bool s_has_sse41(void) {
uint32_t abcd[4];
uint32_t sse41_mask = 0x00080000;
aws_run_cpuid(1, 0, abcd);

if ((abcd[2] & sse41_mask) != sse41_mask)
return false;

return true;
}

static bool s_has_sse42(void) {
uint32_t abcd[4];
uint32_t sse42_mask = 0x00100000;
aws_run_cpuid(1, 0, abcd);

if ((abcd[2] & sse42_mask) != sse42_mask)
return false;

return true;
}
aws_run_cpuid(0x0, 0x0, abcd);
const uint32_t max_cpuid_eax_value = abcd[0]; /* max-value = EAX */

/**************************************************************************
* CPUID(EAX=1H, ECX=0H): Processor Info and Feature Bits
* https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=1:_Processor_Info_and_Feature_Bits
**************************************************************************/
if (0x1 > max_cpuid_eax_value) {
return;
}
aws_run_cpuid(0x1, 0x0, abcd);
s_cpu_features[AWS_CPU_FEATURE_CLMUL] = abcd[2] & (1 << 1); /* pclmulqdq = ECX[bit 1] */
s_cpu_features[AWS_CPU_FEATURE_SSE_4_1] = abcd[2] & (1 << 19); /* sse4.1 = ECX[bit 19] */
s_cpu_features[AWS_CPU_FEATURE_SSE_4_2] = abcd[2] & (1 << 20); /* sse4.2 = ECX[bit 20] */

/* NOTE: Even if the AVX flag is set, it's not necessarily usable.
* We need to check that OSXSAVE is enabled, and check further capabilities via XGETBV.
* GCC had the same bug until 7.4: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100 */
bool avx_usable = false;
bool avx512_usable = false;
bool feature_osxsave = abcd[2] & (1 << 27); /* osxsave = ECX[bit 27] */
if (feature_osxsave) {
/* Check XCR0 (Extended Control Register 0) via XGETBV
* https://en.wikipedia.org/w/index.php?title=Control_register&oldid=1268423710#XCR0_and_XSS */
uint64_t xcr0 = aws_run_xgetbv(0);
const uint64_t avx_mask = (1 << 1) /* SSE = XCR0[bit 1] */
| (1 << 2) /* AVX = XCR0[bit 2] */;
avx_usable = (xcr0 & avx_mask) == avx_mask;

const uint64_t avx512_mask = (1 << 5) /* OPMASK = XCR0[bit 5] */
| (1 << 6) /* ZMM_Hi256 = XCR0[bit 6] */
| (1 << 7) /* Hi16_ZMM = XCR0[bit 7] */
| avx_mask;
avx512_usable = (xcr0 & avx512_mask) == avx512_mask;
}

static bool s_has_avx2(void) {
uint32_t abcd[4];
bool feature_avx = false;
if (avx_usable) {
feature_avx = abcd[2] & (1 << 28); /* avx = ECX[bit 28] */
}

/* Check AVX2:
* CPUID.(EAX=07H, ECX=0H):EBX.AVX2[bit 5]==1 */
uint32_t avx2_mask = (1 << 5);
aws_run_cpuid(7, 0, abcd);
if ((abcd[1] & avx2_mask) != avx2_mask) {
return false;
/***************************************************************************
* CPUID(EAX=7H, ECX=0H): Extended Features
* https://en.wikipedia.org/w/index.php?title=CPUID&oldid=1270569388#EAX=7,_ECX=0:_Extended_Features
**************************************************************************/
if (0x7 > max_cpuid_eax_value) {
return;
}
aws_run_cpuid(0x7, 0x0, abcd);
s_cpu_features[AWS_CPU_FEATURE_BMI2] = abcd[1] & (1 << 8); /* bmi2 = EBX[bit 8] */
s_cpu_features[AWS_CPU_FEATURE_VPCLMULQDQ] = abcd[2] & (1 << 10); /* vpclmulqdq = ECX[bit 10] */

/* Also check AVX:
* CPUID.(EAX=01H, ECX=0H):ECX.AVX[bit 28]==1
*
* NOTE: It SHOULD be impossible for a CPU to support AVX2 without supporting AVX.
/* NOTE: It SHOULD be impossible for a CPU to support AVX2 without supporting AVX.
* But we've received crash reports where the AVX2 feature check passed
* and then an AVX instruction caused an "invalid instruction" crash.
*
Expand All @@ -76,69 +89,22 @@ static bool s_has_avx2(void) {
*
* We don't know for sure what was up with those machines, but this extra
* check should stop them from running our AVX/AVX2 code paths. */
uint32_t avx1_mask = (1 << 28);
aws_run_cpuid(1, 0, abcd);
if ((abcd[2] & avx1_mask) != avx1_mask) {
return false;
}

return true;
}

static bool s_has_avx512(void) {
uint32_t abcd[4];

/* Check AVX512F:
* CPUID.(EAX=07H, ECX=0H):EBX.AVX512[bit 16]==1 */
uint32_t avx512_mask = (1 << 16);
aws_run_cpuid(7, 0, abcd);
if ((abcd[1] & avx512_mask) != avx512_mask) {
return false;
}

return true;
}

static bool s_has_bmi2(void) {
uint32_t abcd[4];

/* Check BMI2:
* CPUID.(EAX=07H, ECX=0H):EBX.BMI2[bit 8]==1 */
uint32_t bmi2_mask = (1 << 8);
aws_run_cpuid(7, 0, abcd);
if ((abcd[1] & bmi2_mask) != bmi2_mask) {
return false;
if (feature_avx) {
if (avx512_usable) {
s_cpu_features[AWS_CPU_FEATURE_AVX512] = abcd[1] & (1 << 16); /* AVX-512 Foundation = EBX[bit 16] */
}
}

return true;
}

static bool s_has_vpclmulqdq(void) {
uint32_t abcd[4];
/* Check VPCLMULQDQ:
* CPUID.(EAX=07H, ECX=0H):ECX.VPCLMULQDQ[bit 10]==1 */
uint32_t vpclmulqdq_mask = (1 << 10);
aws_run_cpuid(7, 0, abcd);
if ((abcd[2] & vpclmulqdq_mask) != vpclmulqdq_mask) {
return false;
bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) {
/* Look up and cache all hardware features the first time this is called */
if (AWS_UNLIKELY(!s_cpu_features_cached)) {
s_cache_cpu_features();
s_cpu_features_cached = true;
}
return true;
}

has_feature_fn *s_check_cpu_feature[AWS_CPU_FEATURE_COUNT] = {
[AWS_CPU_FEATURE_CLMUL] = s_has_clmul,
[AWS_CPU_FEATURE_SSE_4_1] = s_has_sse41,
[AWS_CPU_FEATURE_SSE_4_2] = s_has_sse42,
[AWS_CPU_FEATURE_AVX2] = s_has_avx2,
[AWS_CPU_FEATURE_AVX512] = s_has_avx512,
[AWS_CPU_FEATURE_BMI2] = s_has_bmi2,
[AWS_CPU_FEATURE_VPCLMULQDQ] = s_has_vpclmulqdq,
};

bool aws_cpu_has_feature(enum aws_cpu_feature_name feature_name) {
if (s_check_cpu_feature[feature_name])
return s_check_cpu_feature[feature_name]();
return false;
AWS_ASSERT(feature_name >= 0 && feature_name < AWS_CPU_FEATURE_COUNT);
return s_cpu_features[feature_name];
}

#define CPUID_AVAILABLE 0
Expand Down
5 changes: 5 additions & 0 deletions source/arch/intel/msvc/cpuid.c
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@

#include <aws/common/cpuid.h>

#include <immintrin.h>
#include <intrin.h>

void aws_run_cpuid(uint32_t eax, uint32_t ecx, uint32_t *abcd) {
__cpuidex((int32_t *)abcd, eax, ecx);
}

uint64_t aws_run_xgetbv(uint32_t xcr) {
return _xgetbv(xcr);
}

0 comments on commit f6622ac

Please sign in to comment.