Skip to content

Commit

Permalink
Add POPCNT detection.
Browse files Browse the repository at this point in the history
  • Loading branch information
klauspost committed Jun 15, 2015
1 parent 0623620 commit 9c472e6
Show file tree
Hide file tree
Showing 5 changed files with 123 additions and 50 deletions.
114 changes: 64 additions & 50 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,56 +16,70 @@ Package home: https://github.com/klauspost/cpuid
[4]: https://travis-ci.org/klauspost/cpuid

# features
Currently these CPU features are detected:
* **CMOV**; (i686 CMOV)
* **NX**; (NX (No-Execute) bit)
* **AMD3DNOW**; (AMD 3DNOW)
* **AMD3DNOWEXT**; (AMD 3DNowExt)
* **MMX**; (standard MMX)
* **MMXEXT**; (SSE integer functions or AMD MMX ext)
* **SSE**; (SSE functions)
* **SSE2**; (P4 SSE functions)
* **SSE3**; (Prescott SSE3 functions)
* **SSSE3**; (Conroe SSSE3 functions)
* **SSE4**; (Penryn SSE4.1 functions)
* **SSE4A**; (AMD Barcelona microarchitecture SSE4a instructions)
* **SSE42**; (Nehalem SSE4.2 functions)
* **AVX**; (AVX functions)
* **AVX2**; (AVX2 functions)
* **FMA3**; (Intel FMA 3)
* **FMA4**; (Bulldozer FMA4 functions)
* **XOP**; (Bulldozer XOP functions)
* **F16C**; (Half-precision floating-point conversion)
* **BMI1**; (Bit Manipulation Instruction Set 1)
* **BMI2**; (Bit Manipulation Instruction Set 2)
* **TBM**; (AMD Trailing Bit Manipulation)
* **LZCNT**; (LZCNT instruction)
* **AESNI**; (Advanced Encryption Standard New Instructions)
* **CLMUL**; (Carry-less Multiplication)
* **HTT**; (Hyperthreading (enabled))
* **HLE**; (Hardware Lock Elision)
* **RTM**; (Restricted Transactional Memory)
* **RDRAND**; (RDRAND instruction is available)
* **RDSEED**; (RDSEED instruction is available)
* **ADX**; (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
* **SHA**; (Intel SHA Extensions)
* **AVX512F**; (AVX-512 Foundation)
* **AVX512DQ**; (AVX-512 Doubleword and Quadword Instructions)
* **AVX512IFMA**; (AVX-512 Integer Fused Multiply-Add Instructions)
* **AVX512PF**; (AVX-512 Prefetch Instructions)
* **AVX512ER**; (AVX-512 Exponential and Reciprocal Instructions)
* **AVX512CD**; (AVX-512 Conflict Detection Instructions)
* **AVX512BW**; (AVX-512 Byte and Word Instructions)
* **AVX512VL**; (AVX-512 Vector Length Extensions)
* **AVX512VBMI**; (AVX-512 Vector Bit Manipulation Instructions)
* **MPX**; (Intel MPX (Memory Protection Extensions))
* **ERMS**; (Enhanced REP MOVSB/STOSB)
* **RDTSCP**; (RDTSCP Instruction)
* **CX16**; (CMPXCHG16B Instruction)

* **SSE2SLOW**; (SSE2 is supported, but usually not faster)
* **SSE3SLOW**; (SSE3 is supported, but usually not faster)
* **ATOM**; (Atom processor, some SSSE3 instructions are slower)
## CPU Instructions
* **CMOV** (i686 CMOV)
* **NX** (NX (No-Execute) bit)
* **AMD3DNOW** (AMD 3DNOW)
* **AMD3DNOWEXT** (AMD 3DNowExt)
* **MMX** (standard MMX)
* **MMXEXT** (SSE integer functions or AMD MMX ext)
* **SSE** (SSE functions)
* **SSE2** (P4 SSE functions)
* **SSE3** (Prescott SSE3 functions)
* **SSSE3** (Conroe SSSE3 functions)
* **SSE4** (Penryn SSE4.1 functions)
* **SSE4A** (AMD Barcelona microarchitecture SSE4a instructions)
* **SSE42** (Nehalem SSE4.2 functions)
* **AVX** (AVX functions)
* **AVX2** (AVX2 functions)
* **FMA3** (Intel FMA 3)
* **FMA4** (Bulldozer FMA4 functions)
* **XOP** (Bulldozer XOP functions)
* **F16C** (Half-precision floating-point conversion)
* **BMI1** (Bit Manipulation Instruction Set 1)
* **BMI2** (Bit Manipulation Instruction Set 2)
* **TBM** (AMD Trailing Bit Manipulation)
* **LZCNT** (LZCNT instruction)
* **POPCNT** (POPCNT instruction)
* **AESNI** (Advanced Encryption Standard New Instructions)
* **CLMUL** (Carry-less Multiplication)
* **HTT** (Hyperthreading (enabled))
* **HLE** (Hardware Lock Elision)
* **RTM** (Restricted Transactional Memory)
* **RDRAND** (RDRAND instruction is available)
* **RDSEED** (RDSEED instruction is available)
* **ADX** (Intel ADX (Multi-Precision Add-Carry Instruction Extensions))
* **SHA** (Intel SHA Extensions)
* **AVX512F** (AVX-512 Foundation)
* **AVX512DQ** (AVX-512 Doubleword and Quadword Instructions)
* **AVX512IFMA** (AVX-512 Integer Fused Multiply-Add Instructions)
* **AVX512PF** (AVX-512 Prefetch Instructions)
* **AVX512ER** (AVX-512 Exponential and Reciprocal Instructions)
* **AVX512CD** (AVX-512 Conflict Detection Instructions)
* **AVX512BW** (AVX-512 Byte and Word Instructions)
* **AVX512VL** (AVX-512 Vector Length Extensions)
* **AVX512VBMI** (AVX-512 Vector Bit Manipulation Instructions)
* **MPX** (Intel MPX (Memory Protection Extensions))
* **ERMS** (Enhanced REP MOVSB/STOSB)
* **RDTSCP** (RDTSCP Instruction)
* **CX16** (CMPXCHG16B Instruction)

## Performance
* **SSE2SLOW** (SSE2 is supported, but usually not faster)
* **SSE3SLOW** (SSE3 is supported, but usually not faster)
* **ATOM** (Atom processor, some SSSE3 instructions are slower)
* **Cache line** (Probable size of a cache line).

## Cpu Vendor/VM
* **Intel**
* **AMD**
* **VIA**
* **Transmeta**
* **NSC**
* **KVM** (Kernel-based Virtual Machine)
* **MSVM** (Microsoft Hyper-V or Windows Virtual PC)
* **VMware**
* **XenHVM**

# installing

Expand Down
10 changes: 10 additions & 0 deletions cpuid.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ const (
BMI2 // Bit Manipulation Instruction Set 2
TBM // AMD Trailing Bit Manipulation
LZCNT // LZCNT instruction
POPCNT // POPCNT instruction
AESNI // Advanced Encryption Standard New Instructions
CLMUL // Carry-less Multiplication
HTT // Hyperthreading (enabled)
Expand Down Expand Up @@ -104,6 +105,7 @@ var flagNames = map[Flags]string{
BMI2: "BMI2", // Bit Manipulation Instruction Set 2
TBM: "TBM", // AMD Trailing Bit Manipulation
LZCNT: "LZCNT", // LZCNT instruction
POPCNT: "POPCNT", // POPCNT instruction
AESNI: "AESNI", // Advanced Encryption Standard New Instructions
CLMUL: "CLMUL", // Carry-less Multiplication
HTT: "HTT", // Hyperthreading (enabled)
Expand Down Expand Up @@ -290,6 +292,11 @@ func (c CPUInfo) Lzcnt() bool {
return c.Features&LZCNT != 0
}

// Popcnt indicates support of POPCNT instruction
func (c CPUInfo) Popcnt() bool {
return c.Features&POPCNT != 0
}

// HTT indicates the processor has Hyperthreading enabled
func (c CPUInfo) HTT() bool {
return c.Features&HTT != 0
Expand Down Expand Up @@ -675,6 +682,9 @@ func support() Flags {
if (c & (1 << 1)) != 0 {
rval |= CLMUL
}
if c&(1<<23) != 0 {
rval |= POPCNT
}
if c&(1<<30) != 0 {
rval |= RDRAND
}
Expand Down
10 changes: 10 additions & 0 deletions cpuid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,16 @@ func TestLzcnt(t *testing.T) {
t.Log("LZCNT Support:", got)
}

// TestLzcnt tests Lzcnt() function
func TestPopcnt(t *testing.T) {
got := CPU.Popcnt()
expected := CPU.Features&POPCNT == POPCNT
if got != expected {
t.Fatalf("Popcnt: expected %v, got %v", expected, got)
}
t.Log("POPCNT Support:", got)
}

// TestAesNi tests AesNi() function
func TestAesNi(t *testing.T) {
got := CPU.AesNi()
Expand Down
19 changes: 19 additions & 0 deletions private/cpuid.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const (
bmi2 // Bit Manipulation Instruction Set 2
tbm // AMD Trailing Bit Manipulation
lzcnt // LZCNT instruction
popcnt // POPCNT instruction
aesni // Advanced Encryption Standard New Instructions
clmul // Carry-less Multiplication
htt // Hyperthreading (enabled)
Expand All @@ -67,6 +68,7 @@ const (
mpx // Intel MPX (Memory Protection Extensions)
erms // Enhanced REP MOVSB/STOSB
rdtscp // RDTSCP Instruction
cx16 // CMPXCHG16B Instruction

// Performance indicators
sse2slow // SSE2 is supported, but usually not faster
Expand Down Expand Up @@ -97,6 +99,7 @@ var flagNames = map[flags]string{
bmi2: "BMI2", // Bit Manipulation Instruction Set 2
tbm: "TBM", // AMD Trailing Bit Manipulation
lzcnt: "LZCNT", // LZCNT instruction
popcnt: "POPCNT", // POPCNT instruction
aesni: "AESNI", // Advanced Encryption Standard New Instructions
clmul: "CLMUL", // Carry-less Multiplication
htt: "HTT", // Hyperthreading (enabled)
Expand All @@ -118,6 +121,7 @@ var flagNames = map[flags]string{
mpx: "MPX", // Intel MPX (Memory Protection Extensions)
erms: "ERMS", // Enhanced REP MOVSB/STOSB
rdtscp: "RDTSCP", // RDTSCP Instruction
cx16: "CX16", // CMPXCHG16B Instruction

// Performance indicators
sse2slow: "SSE2SLOW", // SSE2 supported, but usually not faster
Expand Down Expand Up @@ -282,6 +286,11 @@ func (c cpuInfo) lzcnt() bool {
return c.features&lzcnt != 0
}

// Popcnt indicates support of POPCNT instruction
func (c cpuInfo) popcnt() bool {
return c.features&popcnt != 0
}

// HTT indicates the processor has Hyperthreading enabled
func (c cpuInfo) htt() bool {
return c.features&htt != 0
Expand Down Expand Up @@ -408,6 +417,10 @@ func (c cpuInfo) rdtscp() bool {
return c.features&rdtscp != 0
}

func (c cpuInfo) cx16() bool {
return c.features&cx16 != 0
}

// Atom indicates an Atom processor
func (c cpuInfo) atom() bool {
return c.features&atom != 0
Expand Down Expand Up @@ -663,12 +676,18 @@ func support() flags {
if (c & (1 << 1)) != 0 {
rval |= clmul
}
if c&(1<<23) != 0 {
rval |= popcnt
}
if c&(1<<30) != 0 {
rval |= rdrand
}
if c&(1<<29) != 0 {
rval |= f16c
}
if c&(1<<13) != 0 {
rval |= cx16
}
if (c & (1 << 28)) != 0 {
// This field does not indicate that Hyper-Threading
// Technology has been enabled for this specific processor.
Expand Down
20 changes: 20 additions & 0 deletions private/cpuid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,16 @@ func TestF16C(t *testing.T) {
t.Log("F16C Support:", got)
}

// TestCX16 tests CX16() function
func TestCX16(t *testing.T) {
got := cpu.cx16()
expected := cpu.features&cx16 == cx16
if got != expected {
t.Fatalf("CX16: expected %v, got %v", expected, got)
}
t.Log("CX16 Support:", got)
}

// TestBMI1 tests BMI1() function
func TestBMI1(t *testing.T) {
got := cpu.bmi1()
Expand Down Expand Up @@ -260,6 +270,16 @@ func TestLzcnt(t *testing.T) {
t.Log("LZCNT Support:", got)
}

// TestLzcnt tests Lzcnt() function
func TestPopcnt(t *testing.T) {
got := cpu.popcnt()
expected := cpu.features&popcnt == popcnt
if got != expected {
t.Fatalf("Popcnt: expected %v, got %v", expected, got)
}
t.Log("POPCNT Support:", got)
}

// TestAesNi tests AesNi() function
func TestAesNi(t *testing.T) {
got := cpu.aesni()
Expand Down

0 comments on commit 9c472e6

Please sign in to comment.