Skip to content

Commit

Permalink
Implement some MMX instructions
Browse files Browse the repository at this point in the history
pmuludq, punpckldq, movd, psrlq, psllq, paddq, pand, pxor

Fixes libcrypto when the SSE bit is on in cpuid
  • Loading branch information
tbodt committed Jun 13, 2020
1 parent 7c91012 commit 220745b
Show file tree
Hide file tree
Showing 6 changed files with 136 additions and 22 deletions.
41 changes: 36 additions & 5 deletions emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,7 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0x70: TRACEI("pshufd xmm:modrm, xmm, imm8");
READMODRM; READIMM8; V_OP_IMM(shuffle_d, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x73: READMODRM;
case 0x73: READMODRM_NOMEM;
switch (modrm.opcode) {
case 0x02: TRACEI("psrlq imm, xmm");
READIMM8; V_OP(imm_shiftr_q, imm, xmm_modrm_reg, 128); break;
Expand Down Expand Up @@ -339,6 +339,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf3: TRACEI("psllq xmm:modrm, xmm");
READMODRM; V_OP(shiftl_q, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf4: TRACEI("pmuludq xmm:modrm, xmm");
READMODRM; V_OP(mulu_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xfb: TRACEI("psubq xmm:modrm, xmm");
READMODRM; V_OP(sub_q, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xfc: TRACEI("paddb xmm:modrm, xmm");
Expand All @@ -363,10 +365,39 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0x57: TRACEI("xorps xmm:modrm, xmm");
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x6f: TRACEI("movq modrm, mm");
READMODRM; VMOV(mm_modrm_val, mm_modrm_reg, 64); break;
case 0x7f: TRACEI("movq mm, modrm");
READMODRM; VMOV(mm_modrm_reg, mm_modrm_val, 64); break;
case 0x62: TRACEI("punpckldq mm:modrm, mm");
READMODRM; V_OP(unpack_dq, mm_modrm_val, mm_modrm_reg,64); break;

case 0x6e: TRACEI("movd modrm, mm");
READMODRM; VMOV(modrm_val, mm_modrm_reg,32); break;
case 0x6f: TRACEI("movq mm:modrm, mm");
READMODRM; VMOV(mm_modrm_val, mm_modrm_reg,64); break;

case 0x73: READMODRM;
switch (modrm.opcode) {
case 2: TRACEI("psrlq imm, mm");
READIMM8; V_OP(imm_shiftr_q, imm, mm_modrm_reg, 64); break;
case 6: TRACEI("psllq imm, mm");
READIMM8; V_OP(imm_shiftl_q, imm, mm_modrm_reg, 64); break;
default: UNDEFINED;
}
break;

case 0x7e: TRACEI("movd mm, modrm");
READMODRM; VMOV(mm_modrm_reg, modrm_val,32); break;
case 0x7f: TRACEI("movq mm, mm:modrm");
READMODRM_MEM; VMOV(mm_modrm_reg, mm_modrm_val,64); break;

case 0xd4: TRACEI("paddq mm:modrm, mm");
READMODRM; V_OP(add_q, mm_modrm_val, mm_modrm_reg,64); break;
case 0xdb: TRACEI("pand mm:modrm, mm");
READMODRM; V_OP(and, mm_modrm_val, mm_modrm_reg,64); break;

case 0xef: TRACEI("pxor mm:modrm, mm");
READMODRM; V_OP(xor, mm_modrm_val, mm_modrm_reg,64); break;

case 0xf4: TRACEI("pmuludq mm:modrm, mm");
READMODRM; V_OP(mulu_dq, mm_modrm_val, mm_modrm_reg,64); break;
#endif

default: TRACEI("undefined");
Expand Down
56 changes: 48 additions & 8 deletions emu/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ VEC_ZERO_COPY(128, 128)
VEC_ZERO_COPY(128, 64)
VEC_ZERO_COPY(128, 32)
VEC_ZERO_COPY(64, 64)
VEC_ZERO_COPY(64, 32)
VEC_ZERO_COPY(32, 32)

void vec_merge32(NO_CPU, const void *src, void *dst) {
Expand All @@ -38,6 +39,12 @@ void vec_imm_shiftl_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
dst->qw[1] <<= amount;
}
}
void vec_imm_shiftl_q64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 63)
dst->qw = 0;
else
dst->qw <<= amount;
}

void vec_imm_shiftr_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
if (amount > 63) {
Expand All @@ -47,6 +54,12 @@ void vec_imm_shiftr_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
dst->qw[1] >>= amount;
}
}
void vec_imm_shiftr_q64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 63)
dst->qw = 0;
else
dst->qw >>= amount;
}

void vec_shiftl_q128(NO_CPU, union xmm_reg *amount, union xmm_reg *dst) {
uint64_t amount_qw = amount->qw[0];
Expand Down Expand Up @@ -78,15 +91,33 @@ void vec_add_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] += src->qw[0];
dst->qw[1] += src->qw[1];
}
void vec_add_q64(NO_CPU, union mm_reg *src, union mm_reg *dst) {
dst->qw += src->qw;
}
void vec_sub_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] -= src->qw[0];
dst->qw[1] -= src->qw[1];
}

void vec_mulu_dq128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] = (uint64_t) src->u32[0] * dst->u32[0];
dst->qw[1] = (uint64_t) src->u32[2] * dst->u32[2];
}
void vec_mulu_dq64(NO_CPU, union mm_reg *src, union mm_reg *dst) {
dst->qw = (uint64_t) src->dw[0] * dst->dw[0];
}

void vec_and128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] &= src->qw[0];
dst->qw[1] &= src->qw[1];
}
void vec_and64(NO_CPU, union mm_reg *src, union mm_reg *dst) {
dst->qw &= src->qw;
}
void vec_andn128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] = ~dst->qw[0] & src->qw[0];
dst->qw[1] = ~dst->qw[1] & src->qw[1];
}
void vec_or128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] |= src->qw[0];
dst->qw[1] |= src->qw[1];
Expand All @@ -95,9 +126,8 @@ void vec_xor128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] ^= src->qw[0];
dst->qw[1] ^= src->qw[1];
}
void vec_andn128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] = ~dst->qw[0] & src->qw[0];
dst->qw[1] = ~dst->qw[1] & src->qw[1];
void vec_xor64(NO_CPU, union mm_reg *src, union mm_reg *dst) {
dst->qw ^= src->qw;
}

void vec_min_ub128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
Expand All @@ -123,12 +153,12 @@ void vec_single_fcmp64(NO_CPU, const double *src, union xmm_reg *dst, uint8_t ty
}

void vec_single_fadd64(NO_CPU, const double *src, double *dst) { *dst += *src; }
void vec_single_fmul64(NO_CPU, const double *src, double *dst) { *dst *= *src; }
void vec_single_fsub64(NO_CPU, const double *src, double *dst) { *dst -= *src; }
void vec_single_fdiv64(NO_CPU, const double *src, double *dst) { *dst /= *src; }
void vec_single_fadd32(NO_CPU, const float *src, float *dst) { *dst += *src; }
void vec_single_fmul64(NO_CPU, const double *src, double *dst) { *dst *= *src; }
void vec_single_fmul32(NO_CPU, const float *src, float *dst) { *dst *= *src; }
void vec_single_fsub64(NO_CPU, const double *src, double *dst) { *dst -= *src; }
void vec_single_fsub32(NO_CPU, const float *src, float *dst) { *dst -= *src; }
void vec_single_fdiv64(NO_CPU, const double *src, double *dst) { *dst /= *src; }
void vec_single_fdiv32(NO_CPU, const float *src, float *dst) { *dst /= *src; }

void vec_single_fmax64(NO_CPU, const double *src, double *dst) {
Expand Down Expand Up @@ -160,10 +190,17 @@ void vec_single_ucomi64(struct cpu_state *cpu, const double *src, const double *
cpu->sf_res = 0;
}

// TODO float edge cases e.g. nan
// come to the dark side of macros
#define _ISNAN_int32_t(x) false
#define _ISNAN_float(x) isnan(x)
#define _ISNAN_double(x) isnan(x)
#define _ISNAN(x, t) _ISNAN_##t(x)
#define VEC_CVT(name, src_t, dst_t) \
void vec_cvt##name(NO_CPU, const src_t *src, dst_t *dst) { \
*dst = *src; \
if (_ISNAN(*src, src_t)) \
*dst = INT32_MIN; \
else \
*dst = *src; \
}
VEC_CVT(si2sd32, int32_t, double)
VEC_CVT(tsd2si64, double, int32_t)
Expand All @@ -183,6 +220,9 @@ void vec_unpack_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[2] = dst->u32[1];
dst->u32[1] = src->u32[0];
}
void vec_unpack_dq64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
dst->dw[1] = src->dw[0];
}
void vec_unpack_qdq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->qw[1] = src->qw[0];
}
Expand Down
17 changes: 13 additions & 4 deletions emu/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,34 +11,42 @@ void vec_zero128_copy128(NO_CPU, const void *src, void *dst);
void vec_zero128_copy64(NO_CPU, const void *src, void *dst);
void vec_zero128_copy32(NO_CPU, const void *src, void *dst);
void vec_zero64_copy64(NO_CPU, const void *src, void *dst);
void vec_zero64_copy32(NO_CPU, const void *src, void *dst);
void vec_zero32_copy32(NO_CPU, const void *src, void *dst);
// "merge" means don't zero the register before writing to it
void vec_merge32(NO_CPU, const void *src, void *dst);
void vec_merge64(NO_CPU, const void *src, void *dst);
void vec_merge128(NO_CPU, const void *src, void *dst);

void vec_imm_shiftl_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst);
void vec_imm_shiftl_q64(NO_CPU, const uint8_t amount, union mm_reg *dst);
void vec_imm_shiftr_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst);
void vec_imm_shiftr_q64(NO_CPU, const uint8_t amount, union mm_reg *dst);
void vec_shiftl_q128(NO_CPU, union xmm_reg *amount, union xmm_reg *dst);
void vec_shiftr_q128(NO_CPU, union xmm_reg *amount, union xmm_reg *dst);
void vec_add_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_q64(NO_CPU, union mm_reg *src, union mm_reg *dst);
void vec_sub_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_mulu_dq128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_mulu_dq64(NO_CPU, union mm_reg *src, union mm_reg *dst);

void vec_and128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_and64(NO_CPU, union mm_reg *src, union mm_reg *dst);
void vec_andn128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_or128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_xor128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_xor64(NO_CPU, union mm_reg *src, union mm_reg *dst);

void vec_min_ub128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);


void vec_single_fadd64(NO_CPU, const double *src, double *dst);
void vec_single_fmul64(NO_CPU, const double *src, double *dst);
void vec_single_fsub64(NO_CPU, const double *src, double *dst);
void vec_single_fdiv64(NO_CPU, const double *src, double *dst);
void vec_single_fadd32(NO_CPU, const float *src, float *dst);
void vec_single_fmul64(NO_CPU, const double *src, double *dst);
void vec_single_fmul32(NO_CPU, const float *src, float *dst);
void vec_single_fsub64(NO_CPU, const double *src, double *dst);
void vec_single_fsub32(NO_CPU, const float *src, float *dst);
void vec_single_fdiv64(NO_CPU, const double *src, double *dst);
void vec_single_fdiv32(NO_CPU, const float *src, float *dst);

void vec_single_fmax64(NO_CPU, const double *src, double *dst);
Expand All @@ -57,6 +65,7 @@ void vec_cvtss2sd32(NO_CPU, const float *src, double *dst);
// TODO organize
void vec_unpack_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpack_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpack_dq64(NO_CPU, const union mm_reg *src, union mm_reg *dst);
void vec_unpack_qdq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_shuffle_lw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
void vec_shuffle_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
Expand Down
2 changes: 1 addition & 1 deletion jit/gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -491,7 +491,7 @@ static inline bool gen_vec(enum arg src, enum arg dst, void (*helper)(), gadget_
g(vec_helper_imm);
GEN(helper);
// This is rm_opcode instead of opcode because PSRLQ is weird like that
GEN(((uint16_t) imm) | (CPU_OFFSET(xmm[modrm->rm_opcode]) << 16));
GEN(((uint16_t) imm) | (cpu_reg_offset(reg, modrm->rm_opcode) << 16));
break;

default: die("unimplemented vecarg");
Expand Down
26 changes: 26 additions & 0 deletions tests/e2e/qemu/expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4360,16 +4360,26 @@ pcmpeqb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab
pcmpeqb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=0000ffff0000ff0000000000ffffffff
pcmpeqd : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=00000000000000000000000000000000
pcmpeqd : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=000000000000000000000000ffffffff
paddq : a=456723c698694873 b=1f297ccd58bad7ab r=6490a093f124201e
paddq : a=007c62c2085427f8 b=0f76255a085427f8 r=0ff2881c10a84ff0
paddq : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1e437bfb3e2e3a326490a093f124201e
paddq : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e54fd3d192b087270ff2881c10a84ff0
pminub : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=41511efb944a58461f2923c658694873
pminub : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=231be9e8c4c9438d0076255a085427f8
pand : a=456723c698694873 b=1f297ccd58bad7ab r=052120c418284023
pand : a=007c62c2085427f8 b=0f76255a085427f8 r=00742042085427f8
pand : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=40501cfb80424044052120c418284023
pand : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=0213e9e8c4c1438800742042085427f8
por : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=ddf35effbdebf9ee5f6f7fcfd8fbdffb
por : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e33be9e8cdef439f0f7e67da085427f8
pxor : a=456723c698694873 b=1f297ccd58bad7ab r=5a4e5f0bc0d39fd8
pxor : a=007c62c2085427f8 b=0f76255a085427f8 r=0f0a479800000000
pxor : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9da342043da9b9aa5a4e5f0bc0d39fd8
pxor : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e1280000092e00170f0a479800000000
pmuludq : a=456723c698694873 b=1f297ccd58bad7ab r=34d36dcc65b9f9d1
pmuludq : a=007c62c2085427f8 b=0f76255a085427f8 r=00455e29c0fd8040
pmuludq : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=6269151e89bfbc8834d36dcc65b9f9d1
pmuludq : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=9e46f0ab618189d200455e29c0fd8040
psubq : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9a5f3e03ea6677a6263da6f93fae70c8
psubq : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=60e80000091dfff3f1063d6800000000
paddb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1d437afa3d2d393264909f93f0231f1e
Expand Down Expand Up @@ -4397,29 +4407,45 @@ pshufd : a=231be9e8cde7438d007c62c2085427f8 ib=78 r=007c62c2231be9e8cde7438d08
pshuflw : a=dc515cff944a58ec456723c698694873 ib=78 r=dc515cff944a58ec9869456723c64873
pshuflw : a=231be9e8cde7438d007c62c2085427f8 ib=78 r=231be9e8cde7438d0854007c62c227f8
psrlq : a=dc515cff944a58ec456723c698694873 ib=07 r=01b8a2b9ff2894b1008ace478d30d290
psrlq : a=456723c698694873 ib=07 r=008ace478d30d290
psrlq : a=231be9e8cde7438d007c62c2085427f8 ib=07 r=004637d3d19bce870000f8c58410a84f
psrlq : a=007c62c2085427f8 ib=07 r=0000f8c58410a84f
psrlq : a=dc515cff944a58ec456723c698694873 b=00000000000000000000000000000007 r=01b8a2b9ff2894b1008ace478d30d290
psrlq : a=231be9e8cde7438d007c62c2085427f8 b=00000000000000000000000000000007 r=004637d3d19bce870000f8c58410a84f
psrlq : a=dc515cff944a58ec456723c698694873 ib=20 r=00000000dc515cff00000000456723c6
psrlq : a=456723c698694873 ib=20 r=00000000456723c6
psrlq : a=231be9e8cde7438d007c62c2085427f8 ib=20 r=00000000231be9e800000000007c62c2
psrlq : a=007c62c2085427f8 ib=20 r=00000000007c62c2
psrlq : a=dc515cff944a58ec456723c698694873 b=00000000000000000000000000000020 r=00000000dc515cff00000000456723c6
psrlq : a=231be9e8cde7438d007c62c2085427f8 b=00000000000000000000000000000020 r=00000000231be9e800000000007c62c2
psllq : a=dc515cff944a58ec456723c698694873 ib=07 r=28ae7fca252c7600b391e34c34a43980
psllq : a=456723c698694873 ib=07 r=b391e34c34a43980
psllq : a=231be9e8cde7438d007c62c2085427f8 ib=07 r=8df4f466f3a1c6803e3161042a13fc00
psllq : a=007c62c2085427f8 ib=07 r=3e3161042a13fc00
psllq : a=dc515cff944a58ec456723c698694873 b=00000000000000000000000000000007 r=28ae7fca252c7600b391e34c34a43980
psllq : a=231be9e8cde7438d007c62c2085427f8 b=00000000000000000000000000000007 r=8df4f466f3a1c6803e3161042a13fc00
psllq : a=dc515cff944a58ec456723c698694873 ib=20 r=944a58ec000000009869487300000000
psllq : a=456723c698694873 ib=20 r=9869487300000000
psllq : a=231be9e8cde7438d007c62c2085427f8 ib=20 r=cde7438d00000000085427f800000000
psllq : a=007c62c2085427f8 ib=20 r=085427f800000000
psllq : a=dc515cff944a58ec456723c698694873 b=00000000000000000000000000000020 r=944a58ec000000009869487300000000
psllq : a=231be9e8cde7438d007c62c2085427f8 b=00000000000000000000000000000020 r=cde7438d00000000085427f800000000
psrlq : a=dc515cff944a58ec456723c698694873 ib=10 r=0000dc515cff944a0000456723c69869
psrlq : a=456723c698694873 ib=10 r=0000456723c69869
psrlq : a=231be9e8cde7438d007c62c2085427f8 ib=10 r=0000231be9e8cde70000007c62c20854
psrlq : a=007c62c2085427f8 ib=10 r=0000007c62c20854
psrlq : a=dc515cff944a58ec456723c698694873 ib=07 r=01b8a2b9ff2894b1008ace478d30d290
psrlq : a=456723c698694873 ib=07 r=008ace478d30d290
psrlq : a=231be9e8cde7438d007c62c2085427f8 ib=07 r=004637d3d19bce870000f8c58410a84f
psrlq : a=007c62c2085427f8 ib=07 r=0000f8c58410a84f
psllq : a=dc515cff944a58ec456723c698694873 ib=10 r=5cff944a58ec000023c6986948730000
psllq : a=456723c698694873 ib=10 r=23c6986948730000
psllq : a=231be9e8cde7438d007c62c2085427f8 ib=10 r=e9e8cde7438d000062c2085427f80000
psllq : a=007c62c2085427f8 ib=10 r=62c2085427f80000
psllq : a=dc515cff944a58ec456723c698694873 ib=07 r=28ae7fca252c7600b391e34c34a43980
psllq : a=456723c698694873 ib=07 r=b391e34c34a43980
psllq : a=231be9e8cde7438d007c62c2085427f8 ib=07 r=8df4f466f3a1c6803e3161042a13fc00
psllq : a=007c62c2085427f8 ib=07 r=3e3161042a13fc00
movmskpd : a=dc515cff944a58ec456723c698694873 r=00000002
movmskpd : a=231be9e8cde7438d007c62c2085427f8 r=00000000
ucomiss : a=2.000000 b=-1.000000 cc=0000
Expand Down
16 changes: 12 additions & 4 deletions tests/e2e/qemu/qemu-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -2205,6 +2205,13 @@ static uint64_t __attribute__((aligned(16))) test_values[4][2] = {
a.q[1], a.q[0],\
ib,\
r.q[1], r.q[0]);\
a.q[0] = test_values[2*i][0];\
asm volatile (#op " $" #ib ", %0" : "=y" (r.q[0]) : "0" (a.q[0]));\
printf("%-9s: a=" FMT64X " ib=%02x r=" FMT64X "\n",\
#op,\
a.q[0],\
ib,\
r.q[0]);\
}\
}

Expand Down Expand Up @@ -2419,12 +2426,12 @@ void test_sse(void)
// MMX_OP2(pcmpeqw);
SSE_OP2(pcmpeqd);

SSE_OP2(paddq);
MMX_OP2(paddq);
// MMX_OP2(pmullw);
// MMX_OP2(psubusb);
// MMX_OP2(psubusw);
SSE_OP2(pminub);
SSE_OP2(pand);
MMX_OP2(pand);
// MMX_OP2(paddusb);
// MMX_OP2(paddusw);
// MMX_OP2(pmaxub);
Expand All @@ -2440,8 +2447,8 @@ void test_sse(void)
// MMX_OP2(paddsb);
// MMX_OP2(paddsw);
// MMX_OP2(pmaxsw);
SSE_OP2(pxor);
// MMX_OP2(pmuludq);
MMX_OP2(pxor);
MMX_OP2(pmuludq);
// MMX_OP2(pmaddwd);
// MMX_OP2(psadbw);
// MMX_OP2(psubb);
Expand Down Expand Up @@ -2558,6 +2565,7 @@ void test_sse(void)
MOVMSK(movmskpd);

/* FPU specific ops */
asm volatile ("emms");

// {
// uint32_t mxcsr;
Expand Down

0 comments on commit 220745b

Please sign in to comment.