From 5302e34f2331c9eddc0ab1cb8d568a5853a59f8a Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Sun, 2 Apr 2017 18:23:12 -0400 Subject: [PATCH] ISA: 3dnow! prefetch -> PREFETCH[W] extensions --- peachpy/x86_64/isa.py | 16 +++++++++----- peachpy/x86_64/uarch.py | 49 ++++++++++++++++++++++------------------- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/peachpy/x86_64/isa.py b/peachpy/x86_64/isa.py index dd284c2..0d75e1c 100644 --- a/peachpy/x86_64/isa.py +++ b/peachpy/x86_64/isa.py @@ -34,9 +34,8 @@ def prerequisites(self): "CPUID": (cpuid,), "MMX": (mmx,), "MMX+": (mmx, mmx_plus), - "3dnow!": (mmx, three_d_now, three_d_now_prefetch), - "3dnow!+": (mmx, three_d_now, three_d_now_prefetch, three_d_now_plus), - "3dnow! Prefetch": (three_d_now_prefetch,), + "3dnow!": (mmx, three_d_now, prefetch, prefetchw), + "3dnow!+": (mmx, three_d_now, three_d_now_plus, prefetch, prefetchw), "FEMMS": (mmx, femms), "SSE": (mmx, mmx_plus, sse), "SSE2": (mmx, mmx_plus, sse, sse2), @@ -73,6 +72,9 @@ def prerequisites(self): avx512f, avx512vbmi), "AVX512IFMA": (mmx, mmx_plus, sse, sse2, sse3, ssse3, sse4_1, sse4_2, sse4_2, avx, f16c, fma3, avx2, avx512f, avx512ifma), + "PREFETCH": (prefetch,), + "PREFETCHW": (prefetchw,), + "PREFETCHWT1": (prefetchwt1,), "CMOV": (cmov,), "POPCNT": (popcnt,), "LZCNT": (lzcnt,), @@ -93,7 +95,6 @@ def ancestors(self): "MMX+": (mmx, mmx_plus), "3dnow!": (mmx, three_d_now), "3dnow!+": (mmx, three_d_now, three_d_now_plus), - "3dnow! Prefetch": (three_d_now_prefetch,), "FEMMS": (femms,), "SSE": (sse,), "SSE2": (sse, sse2), @@ -122,6 +123,9 @@ def ancestors(self): "AVX512CD": (avx, fma3, f16c, avx2, avx512f, avx512cd), "AVX512VBMI": (avx, fma3, f16c, avx2, avx512f, avx512vbmi), "AVX512IFMA": (avx, fma3, f16c, avx2, avx512f, avx512ifma), + "PREFETCH": (prefetch,), + "PREFETCHW": (prefetchw,), + "PREFETCHWT1": (prefetchwt1,), "CMOV": (cmov,), "POPCNT": (popcnt,), "LZCNT": (lzcnt,), @@ -148,7 +152,6 @@ def __repr__(self): mmx_plus = Extension("MMX+", safe_name="MMXPlus") three_d_now = Extension("3dnow!", safe_name="3dnow") three_d_now_plus = Extension("3dnow!+", safe_name="3dnowPlus") -three_d_now_prefetch = Extension("3dnow! Prefetch", safe_name="3dnowPrefetch") femms = Extension("FEMMS") sse = Extension("SSE") sse2 = Extension("SSE2") @@ -173,6 +176,9 @@ def __repr__(self): avx512vl = Extension("AVX512VL") avx512ifma = Extension("AVX512IFMA") avx512vbmi = Extension("AVX512VBMI") +prefetch = Extension("PREFETCH") +prefetchw = Extension("PREFETCHW") +prefetchwt1 = Extension("PREFETCHWT1") xop = Extension("XOP") f16c = Extension("F16C") fma3 = Extension("FMA3") diff --git a/peachpy/x86_64/uarch.py b/peachpy/x86_64/uarch.py index 2d64c2d..eb213b0 100644 --- a/peachpy/x86_64/uarch.py +++ b/peachpy/x86_64/uarch.py @@ -99,54 +99,56 @@ def __repr__(self): ivy_bridge = Microarchitecture('Ivy Bridge', (isa.cmov, isa.mmx_plus, isa.sse4_2, isa.popcnt, isa.avx, isa.f16c), alu_width=128, fpu_width=256, load_with=256, store_width=128) haswell = Microarchitecture('Haswell', (isa.cmov, isa.mmx_plus, isa.sse4_2, isa.popcnt, isa.avx, isa.f16c, isa.fma3, - isa.avx2, isa.lzcnt, isa.three_d_now_prefetch, isa.movbe, isa.bmi2), + isa.avx2, isa.lzcnt, isa.prefetchw, isa.movbe, isa.bmi2), alu_width=256, fpu_width=256, load_with=256, store_width=256) broadwell = Microarchitecture('Broadwell', (isa.cmov, isa.mmx_plus, isa.sse4_2, isa.popcnt, isa.f16c, isa.fma3, isa.avx2, - isa.lzcnt, isa.three_d_now_prefetch, isa.movbe, isa.bmi2, isa.adx), + isa.lzcnt, isa.prefetchw, isa.movbe, isa.bmi2, isa.adx), alu_width=256, fpu_width=256, load_with=256, store_width=256) skylake = Microarchitecture('Skylake', (isa.cmov, isa.mmx_plus, isa.sse4_2, isa.popcnt, isa.f16c, isa.fma3, isa.avx2, - isa.lzcnt, isa.three_d_now_prefetch, isa.movbe, isa.bmi2, isa.adx), + isa.lzcnt, isa.prefetchw, isa.movbe, isa.bmi2, isa.adx), alu_width=256, fpu_width=256, load_with=256, store_width=256) skylake_xeon = Microarchitecture('Skylake Xeon', (isa.cmov, isa.mmx_plus, isa.sse4_2, isa.popcnt, isa.f16c, isa.fma3, - isa.lzcnt, isa.three_d_now_prefetch, isa.movbe, isa.bmi2, isa.adx, + isa.lzcnt, isa.prefetchw, isa.movbe, isa.bmi2, isa.adx, isa.avx512bw, isa.avx512dq, isa.avx512vl, isa.avx512cd), # TODO: update EU width when SKX is out alu_width=512, fpu_width=512, load_with=512, store_width=512) cannonlake = Microarchitecture('Cannonlake', (isa.cmov, isa.mmx_plus, isa.sse4_2, isa.popcnt, isa.f16c, isa.fma3, - isa.lzcnt, isa.three_d_now_prefetch, isa.movbe, isa.bmi2, isa.adx, + isa.lzcnt, isa.prefetchw, isa.movbe, isa.bmi2, isa.adx, isa.avx512bw, isa.avx512dq, isa.avx512vl, isa.avx512cd, isa.avx512ifma, isa.avx512vbmi), # TODO: update EU width when CNL is out alu_width=512, fpu_width=512, load_with=512, store_width=512) knights_landing = Microarchitecture('Knights Landing', (isa.cmov, isa.mmx_plus, isa.sse4_2, isa.popcnt, isa.f16c, isa.fma3, - isa.lzcnt, isa.three_d_now_prefetch, isa.movbe, isa.bmi2, isa.adx, + isa.lzcnt, isa.prefetchw, isa.movbe, isa.bmi2, isa.adx, isa.avx512cd, isa.avx512cd, isa.avx512er), # TODO: update EU width when KNL is out alu_width=512, fpu_width=512, load_with=512, store_width=512) -k8 = Microarchitecture('K8', (isa.cmov, isa.mmx_plus, isa.three_d_now_plus, isa.three_d_now_prefetch, isa.sse2), +k8 = Microarchitecture('K8', (isa.cmov, isa.mmx_plus, isa.three_d_now_plus, isa.sse2, + isa.prefetch, isa.prefetchw), alu_width=64, fpu_width=64, load_with=64, store_width=64) -k10 = Microarchitecture('K10', (isa.cmov, isa.mmx_plus, isa.three_d_now_plus, isa.three_d_now_prefetch, isa.sse4a, - isa.popcnt, isa.lzcnt), +k10 = Microarchitecture('K10', (isa.cmov, isa.mmx_plus, isa.three_d_now_plus, isa.sse4a, + isa.prefetch, isa.prefetchw, isa.popcnt, isa.lzcnt), alu_width=128, fpu_width=128, load_with=128, store_width=64) bulldozer = Microarchitecture('Bulldozer', (isa.cmov, isa.mmx_plus, isa.sse4a, isa.avx, isa.xop, isa.fma4, - isa.three_d_now_prefetch, isa.aes, isa.pclmulqdq, isa.lzcnt, isa.popcnt), + isa.prefetch, isa.prefetchw, isa.aes, isa.pclmulqdq, isa.lzcnt, isa.popcnt), alu_width=128, fpu_width=128, load_with=128, store_width=128) -piledriver = Microarchitecture('Piledriver', (isa.cmov, isa.mmx_plus, isa.sse4a, isa.sse4_2, isa.avx, isa.xop, isa.fma4, - isa.fma3, isa.f16c, isa.three_d_now_prefetch, isa.aes, isa.pclmulqdq, - isa.lzcnt, isa.popcnt, isa.bmi, isa.tbm), +piledriver = Microarchitecture('Piledriver', (isa.cmov, isa.mmx_plus, isa.sse4a, isa.sse4_2, + isa.avx, isa.xop, isa.fma4, isa.fma3, isa.f16c, isa.aes, isa.pclmulqdq, + isa.prefetch, isa.prefetchw, isa.lzcnt, isa.popcnt, isa.bmi, isa.tbm), alu_width=128, fpu_width=128, load_with=128, store_width=128) -steamroller = Microarchitecture('Steamroller', (isa.cmov, isa.mmx_plus, isa.sse4a, isa.avx, isa.xop, isa.fma4, isa.fma3, - isa.f16c, isa.three_d_now_prefetch, isa.aes, isa.pclmulqdq, isa.lzcnt, - isa.popcnt, isa.bmi, isa.tbm), +steamroller = Microarchitecture('Steamroller', (isa.cmov, isa.mmx_plus, isa.sse4a, isa.sse4_2, + isa.avx, isa.xop, isa.fma4, isa.fma3, isa.f16c, isa.aes, isa.pclmulqdq, + isa.prefetch, isa.prefetchw, isa.lzcnt, isa.popcnt, isa.bmi, isa.tbm), alu_width=128, fpu_width=256, load_with=256, store_width=128) excavator = Microarchitecture('Excavator', (isa.cmov, isa.mmx_plus, isa.sse4a, isa.xop, isa.fma4, isa.fma3, isa.f16c, - isa.avx2, isa.three_d_now_prefetch, isa.aes, isa.pclmulqdq, isa.rdrand, + isa.avx2, isa.aes, isa.pclmulqdq, isa.rdrand, isa.prefetch, isa.prefetchw, isa.lzcnt, isa.popcnt, isa.bmi2, isa.tbm), alu_width=256, fpu_width=256, load_with=256, store_width=128) # TODO: update info when Zen is out zen = Microarchitecture('Zen', (isa.cmov, isa.mmx_plus, isa.fma4, isa.fma3, isa.f16c, isa.avx2, isa.aes, isa.pclmulqdq, isa.rdseed, isa.sha, + isa.prefetch, isa.prefetchw, isa.lzcnt, isa.popcnt, isa.bmi2, isa.adx), alu_width=256, fpu_width=256, load_with=256, store_width=256) bonnell = Microarchitecture('Bonnell', (isa.cmov, isa.movbe, isa.mmx_plus, isa.ssse3), @@ -159,16 +161,17 @@ def __repr__(self): airmont = Microarchitecture('Airmont', (isa.cmov, isa.movbe, isa.popcnt, isa.mmx_plus, isa.sse4_2, isa.aes, isa.pclmulqdq, isa.rdrand, - isa.three_d_now_prefetch, isa.rdtscp), + isa.prefetchw, isa.rdtscp), alu_width=128, fpu_width=64, load_with=128, store_width=128) -# TODO: update info when GLM is out goldmont = Microarchitecture('Goldmont', (isa.cmov, isa.movbe, isa.popcnt, isa.adx, - isa.mmx_plus, isa.sse4_2, isa.three_d_now_prefetch, + isa.mmx_plus, isa.sse4_2, isa.prefetchw, isa.aes, isa.pclmulqdq, isa.rdseed, isa.sha, isa.rdtscp), alu_width=128, fpu_width=64, load_with=128, store_width=128) -bobcat = Microarchitecture('Bobcat', (isa.cmov, isa.mmx_plus, isa.three_d_now_prefetch, isa.ssse3, isa.sse4a), +bobcat = Microarchitecture('Bobcat', (isa.cmov, isa.mmx_plus, isa.ssse3, isa.sse4a, + isa.prefetch, isa.prefetchw, isa.lzcnt, isa.popcnt), alu_width=64, fpu_width=64, load_with=64, store_width=64) -jaguar = Microarchitecture('Jaguar', (isa.cmov, isa.movbe, isa.lzcnt, isa.bmi, isa.popcnt, isa.three_d_now_prefetch, - isa.mmx_plus, isa.sse4_2, isa.sse4a, isa.avx, isa.f16c, isa.aes, isa.pclmulqdq), +jaguar = Microarchitecture('Jaguar', (isa.cmov, isa.mmx_plus, isa.sse4_2, isa.sse4a, isa.avx, isa.f16c, + isa.prefetch, isa.prefetchw, isa.lzcnt, isa.popcnt, isa.movbe, + isa.aes, isa.pclmulqdq), alu_width=128, fpu_width=128, load_with=128, store_width=128)