Skip to content

Commit

Permalink
Implement FCMOVcc, cmpss, pminsw, and pmaxsw
Browse files Browse the repository at this point in the history
  • Loading branch information
jason-conway committed Sep 26, 2022
1 parent 135aa54 commit c48d211
Show file tree
Hide file tree
Showing 8 changed files with 136 additions and 19 deletions.
16 changes: 15 additions & 1 deletion emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -426,12 +426,16 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(subss_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xe9: TRACEI("psubsw xmm:modrm, xmm");
READMODRM; V_OP(subss_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xea: TRACEI("pminsw xmm:modrm, xmm");
READMODRM; V_OP(mins_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xeb: TRACEI("por xmm:modrm, xmm");
READMODRM; V_OP(or, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xec: TRACEI("paddsb xmm:modrm, xmm");
READMODRM; V_OP(addss_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xed: TRACEI("paddsw xmm:modrm, xmm");
READMODRM; V_OP(addss_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xee: TRACEI("pmaxsw xmm:modrm, xmm");
READMODRM; V_OP(maxs_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xef: TRACEI("pxor xmm:modrm, xmm");
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf3: TRACEI("psllq xmm:modrm, xmm");
Expand Down Expand Up @@ -927,6 +931,14 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0xd87: TRACE("fdivr st(i), st"); FDIVR(st_i, st_0); break;
case 0xd90: TRACE("fld st(i)"); FLD(); break;
case 0xd91: TRACE("fxch st"); FXCH(); break;
case 0xda0: TRACE("fcmovb st, st(i)"); FCMOVB(st_i); break;
case 0xda1: TRACE("fcmove st, st(i)"); FCMOVE(st_i); break;
case 0xda2: TRACE("fcmovbe st, st(i)"); FCMOVBE(st_i); break;
case 0xda3: TRACE("fcmovu st, st(i)"); FCMOVU(st_i); break;
case 0xdb0: TRACE("fcmovnb st, st(i)"); FCMOVNB(st_i); break;
case 0xdb1: TRACE("fcmovne st, st(i)"); FCMOVNE(st_i); break;
case 0xdb2: TRACE("fcmovnbe st, st(i)"); FCMOVNBE(st_i); break;
case 0xdb3: TRACE("fcmovnu st, st(i)"); FCMOVNU(st_i); break;
case 0xdb5: TRACE("fucomi st"); FUCOMI(); break;
case 0xdb6: TRACE("fcomi st"); FCOMI(); break;
case 0xdc0: TRACE("fadd st, st(i)"); FADD(st_0, st_i); break;
Expand Down Expand Up @@ -1187,7 +1199,6 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(single_fsub, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5e: TRACEI("divss xmm:modrm, xmm");
READMODRM; V_OP(single_fdiv, xmm_modrm_val, xmm_modrm_reg,32); break;

case 0x6f: TRACEI("movdqu xmm:modrm, xmm");
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;

Expand All @@ -1209,6 +1220,9 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0xbd: TRACEI("~~lzcnt~~ bsr modrm, reg");
READMODRM; BSR(modrm_val, modrm_reg,oz); break;

case 0xc2: TRACEI("cmpss xmm:modrm, xmm, imm8");
READMODRM; READIMM8; V_OP_IMM(single_fcmp, xmm_modrm_val, xmm_modrm_reg,32); break;

default: TRACE("undefined"); UNDEFINED;
}
break;
Expand Down
17 changes: 16 additions & 1 deletion emu/fpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,6 @@ void fpu_ldm80(struct cpu_state *cpu, float80 *f) {
void fpu_st(struct cpu_state *cpu, int i) {
ST(i) = ST(0);
}

void fpu_ist16(struct cpu_state *cpu, int16_t *i) {
int64_t res = f80_to_int(ST(0));
if (res < INT16_MIN || res > INT16_MAX)
Expand Down Expand Up @@ -92,6 +91,22 @@ void fpu_stm80(struct cpu_state *cpu, float80 *f) {
memcpy(f, &ST(0), 10);
}

// moves

#define FCMOVcc(instr, cond) \
void fpu_cmov##instr(struct cpu_state *cpu, int i) { \
if (cond) \
ST(0) = ST(i); \
}
FCMOVcc(b, cpu->cf)
FCMOVcc(e, cpu->zf)
FCMOVcc(be, cpu->cf | cpu->zf)
FCMOVcc(u, cpu->pf)
FCMOVcc(nb, !cpu->cf)
FCMOVcc(ne, !cpu->zf)
FCMOVcc(nbe, !(cpu->cf | cpu->zf))
FCMOVcc(nu, !cpu->pf)

// math

void fpu_prem(struct cpu_state *cpu) {
Expand Down
9 changes: 9 additions & 0 deletions emu/fpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,15 @@ void fpu_stm32(struct cpu_state *cpu, float *f);
void fpu_stm64(struct cpu_state *cpu, double *f);
void fpu_stm80(struct cpu_state *cpu, float80 *f);

void fpu_cmovb(struct cpu_state *cpu, int i);
void fpu_cmove(struct cpu_state *cpu, int i);
void fpu_cmovbe(struct cpu_state *cpu, int i);
void fpu_cmovu(struct cpu_state *cpu, int i);
void fpu_cmovnb(struct cpu_state *cpu, int i);
void fpu_cmovne(struct cpu_state *cpu, int i);
void fpu_cmovnbe(struct cpu_state *cpu, int i);
void fpu_cmovnu(struct cpu_state *cpu, int i);

void fpu_ld(struct cpu_state *cpu, int i);
void fpu_ldc(struct cpu_state *cpu, enum fpu_const c);
void fpu_ild16(struct cpu_state *cpu, int16_t *i);
Expand Down
31 changes: 26 additions & 5 deletions emu/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ static inline uint32_t satub(uint32_t dw) {
dw = 0xff;
return dw;
}
static inline uint32_t satsb(uint32_t dw)
{
static inline uint32_t satsb(uint32_t dw) {
if (dw > 0xffffff80)
dw &= 0xff;
else if (dw > 0x7fffffff)
Expand Down Expand Up @@ -327,12 +326,20 @@ void vec_min_ub128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
if (src->u8[i] < dst->u8[i])
dst->u8[i] = src->u8[i];
}

void vec_max_ub128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u8); i++)
if (src->u8[i] > dst->u8[i])
dst->u8[i] = src->u8[i];
}
void vec_mins_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < 8; i++)
dst->u16[i] = (int16_t)dst->u16[i] < (int16_t)src->u16[i] ? dst->u16[i] : src->u16[i];
}

void vec_maxs_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < 8; i++)
dst->u16[i] = (int16_t)dst->u16[i] > (int16_t)src->u16[i] ? dst->u16[i] : src->u16[i];
}

static bool cmpd(double a, double b, int type) {
bool res;
Expand All @@ -345,10 +352,24 @@ static bool cmpd(double a, double b, int type) {
if (type >= 4) res = !res;
return res;
}
static bool cmps(float a, float b, int type) {
bool res;
switch (type % 4) {
case 0: res = a == b; break;
case 1: res = a < b; break;
case 2: res = a <= b; break;
case 3: res = isnan(a) || isnan(b); break;
}
if (type >= 4) res = !res;
return res;
}

void vec_single_fcmp64(NO_CPU, const double *src, union xmm_reg *dst, uint8_t type) {
dst->qw[0] = cmpd(dst->f64[0], *src, type) ? -1 : 0;
}
void vec_single_fcmp32(NO_CPU, const float *src, union xmm_reg *dst, uint8_t type) {
dst->u32[0] = cmps(dst->f32[0], *src, type) ? -1 : 0;
}

void vec_single_fadd64(NO_CPU, const double *src, double *dst) { *dst += *src; }
void vec_single_fadd32(NO_CPU, const float *src, float *dst) { *dst += *src; }
Expand Down Expand Up @@ -606,11 +627,11 @@ void vec_extract_w128(NO_CPU, const union xmm_reg *src, uint32_t *dst, uint8_t i
}

void vec_avg_b128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for(unsigned i = 0; i < 16; i++)
for (unsigned i = 0; i < 16; i++)
dst->u8[i] = (1 + dst->u8[i] + src->u8[i]) >> 1;
}
void vec_avg_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for(unsigned i = 0; i < 8; i++)
for (unsigned i = 0; i < 8; i++)
dst->u16[i] = (1 + dst->u16[i] + src->u16[i]) >> 1;
}

Expand Down
4 changes: 3 additions & 1 deletion emu/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ void vec_xor128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_xor64(NO_CPU, union mm_reg *src, union mm_reg *dst);

void vec_min_ub128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_mins_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_max_ub128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_maxs_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);

void vec_single_fadd64(NO_CPU, const double *src, double *dst);
void vec_single_fadd32(NO_CPU, const float *src, float *dst);
Expand All @@ -91,7 +93,7 @@ void vec_single_fmin64(NO_CPU, const double *src, double *dst);
void vec_single_ucomi32(struct cpu_state *cpu, const float *src, const float *dst);
void vec_single_ucomi64(struct cpu_state *cpu, const double *src, const double *dst);
void vec_single_fcmp64(NO_CPU, const double *src, union xmm_reg *dst, uint8_t type);

void vec_single_fcmp32(NO_CPU, const float *src, union xmm_reg *dst, uint8_t type);
void vec_fcmp_p64(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t type);

void vec_cvtsi2sd32(NO_CPU, const int32_t *src, double *dst);
Expand Down
8 changes: 8 additions & 0 deletions jit/gen.c
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,14 @@ void helper_rdtsc(struct cpu_state *cpu);
#define FSIN() h(fpu_sin)
#define FCOS() h(fpu_cos)
#define FXTRACT() h(fpu_xtract)
#define FCMOVB(src) hh(fpu_cmovb, src)
#define FCMOVE(src) hh(fpu_cmove, src)
#define FCMOVBE(src) hh(fpu_cmovbe, src)
#define FCMOVU(src) hh(fpu_cmovu, src)
#define FCMOVNB(src) hh(fpu_cmovnb, src)
#define FCMOVNE(src) hh(fpu_cmovne, src)
#define FCMOVNBE(src) hh(fpu_cmovnbe, src)
#define FCMOVNU(src) hh(fpu_cmovnu, src)

// vector

Expand Down
48 changes: 48 additions & 0 deletions tests/e2e/qemu/expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4206,6 +4206,34 @@ fldpi= 3.141593
fldlg2= 0.301030
fldln2= 0.693147
fldz= 0.000000
fcmovb eflags=0x0000-> 1.000000
fcmove eflags=0x0000-> 1.000000
fcmovbe eflags=0x0000-> 1.000000
fcmovnb eflags=0x0000-> 2.000000
fcmovne eflags=0x0000-> 2.000000
fcmovnbe eflags=0x0000-> 2.000000
fcmovb eflags=0x0001-> 2.000000
fcmove eflags=0x0001-> 1.000000
fcmovbe eflags=0x0001-> 2.000000
fcmovnb eflags=0x0001-> 1.000000
fcmovne eflags=0x0001-> 2.000000
fcmovnbe eflags=0x0001-> 1.000000
fcmovb eflags=0x0040-> 1.000000
fcmove eflags=0x0040-> 2.000000
fcmovbe eflags=0x0040-> 2.000000
fcmovnb eflags=0x0040-> 2.000000
fcmovne eflags=0x0040-> 1.000000
fcmovnbe eflags=0x0040-> 1.000000
fcmovb eflags=0x0041-> 2.000000
fcmove eflags=0x0041-> 2.000000
fcmovbe eflags=0x0041-> 2.000000
fcmovnb eflags=0x0041-> 1.000000
fcmovne eflags=0x0041-> 1.000000
fcmovnbe eflags=0x0041-> 1.000000
fcmovu eflags=0x0000-> 1.000000
fcmovu eflags=0x0004-> 2.000000
fcmovnu eflags=0x0000-> 2.000000
fcmovnu eflags=0x0004-> 1.000000
xchgl A=fbca7654 B=12345678
xchgw A=12347654 B=fbca5678
xchgb A=12345654 B=fbca7678
Expand Down Expand Up @@ -4418,12 +4446,16 @@ psubsb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab
psubsb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=61e80000091e00f3f1063d8000000000
psubsw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9a5f3e04ea6777a6263ea6f9800070c8
psubsw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=60e80000091efff3f1063d6800000000
pminsw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=dc511efb944ae1461f2923c69869d7ab
pminsw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=c233e9e8c4c9438d007c255a085427f8
por : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=ddf35effbdebf9ee5f6f7fcfd8fbdffb
por : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e33be9e8cdef439f0f7e67da085427f8
paddsb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1d437afa802d3932647f7f93f0231f1e
paddsb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e54ed2d091b07f800f7f7f1c107f4ef0
paddsw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1e437bfa80003a3264907ffff123201e
paddsw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e54ed3d092b07fff0ff27fff10a84ff0
pmaxsw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=41f25cffa9e358ec45677ccd58ba4873
pmaxsw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=231be9e8cde7439a0f7662c2085427f8
pxor : a=456723c698694873 b=1f297ccd58bad7ab r=5a4e5f0bc0d39fd8
pxor : a=007c62c2085427f8 b=0f76255a085427f8 r=0f0a479800000000
pxor : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9da342043da9b9aa5a4e5f0bc0d39fd8
Expand Down Expand Up @@ -4581,6 +4613,14 @@ mulss : a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd
subps : a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd r=c27a666600000000c3af0000c22c0000
subss : a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd r=c0c9999a408000004059999ac22c0000
divss : a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd r=c0c9999a408000004059999a3d71fee1
cmpeqss : a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd r=c0c9999a408000004059999a00000000
cmpltss : a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd r=c0c9999a408000004059999affffffff
cmpless : a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd r=c0c9999a408000004059999affffffff
cmpunordss: a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd r=c0c9999a408000004059999a00000000
cmpneqss : a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd r=c0c9999a408000004059999affffffff
cmpnltss : a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd r=c0c9999a408000004059999a00000000
cmpnless : a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd r=c0c9999a408000004059999a00000000
cmpordss : a=c0c9999a408000004059999a402ccccd b=426133334080000043b0b3334236cccd r=c0c9999a408000004059999affffffff
addpd : a=c00b333333333333400599999999999a b=c04ab333333333334046d9999999999a r=c04c6666666666664048333333333334
addsd : a=c00b333333333333400599999999999a b=c04ab333333333334046d9999999999a r=c00b3333333333334048333333333334
mulpd : a=c00b333333333333400599999999999a b=c04ab333333333334046d9999999999a r=4066b1eb851eb852405ed8f5c28f5c2a
Expand Down Expand Up @@ -4614,6 +4654,14 @@ mulss : a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd
subps : a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd r=ffc0000000000000c3af0000ffc00000
subss : a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd r=c0c9999a408000004059999affc00000
divss : a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd r=c0c9999a408000004059999affc00000
cmpeqss : a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd r=c0c9999a408000004059999a00000000
cmpltss : a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd r=c0c9999a408000004059999a00000000
cmpless : a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd r=c0c9999a408000004059999a00000000
cmpunordss: a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd r=c0c9999a408000004059999affffffff
cmpneqss : a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd r=c0c9999a408000004059999affffffff
cmpnltss : a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd r=c0c9999a408000004059999affffffff
cmpnless : a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd r=c0c9999a408000004059999affffffff
cmpordss : a=c0c9999a408000004059999affc00000 b=ffc000004080000043b0b3334236cccd r=c0c9999a408000004059999a00000000
addpd : a=c00b333333333333fff8000000000000 b=fff80000000000004046d9999999999a r=fff8000000000000fff8000000000000
addsd : a=c00b333333333333fff8000000000000 b=fff80000000000004046d9999999999a r=c00b333333333333fff8000000000000
mulpd : a=c00b333333333333fff8000000000000 b=fff80000000000004046d9999999999a r=fff8000000000000fff8000000000000
Expand Down
22 changes: 11 additions & 11 deletions tests/e2e/qemu/qemu-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -1035,7 +1035,7 @@ void test_floats(void)
//test_fbcd(-123451234567890.0);
//test_fenv();
if (TEST_CMOV) {
//test_fcmov();
test_fcmov();
}
}

Expand Down Expand Up @@ -2466,11 +2466,11 @@ void test_sse(void)

SSE_OP2(psubsb);
SSE_OP2(psubsw);
// MMX_OP2(pminsw);
SSE_OP2(pminsw);
SSE_OP2(por);
SSE_OP2(paddsb);
SSE_OP2(paddsw);
// MMX_OP2(pmaxsw);
SSE_OP2(pmaxsw);
MMX_OP2(pxor);
MMX_OP2(pmuludq);
SSE_OP2(pmaddwd);
Expand Down Expand Up @@ -2625,14 +2625,14 @@ void test_sse(void)
SSE_OPS_S(div);
// SSE_OPS(max);
// SSE_OPS(sqrt);
// SSE_OPS(cmpeq);
// SSE_OPS(cmplt);
// SSE_OPS(cmple);
// SSE_OPS(cmpunord);
// SSE_OPS(cmpneq);
// SSE_OPS(cmpnlt);
// SSE_OPS(cmpnle);
// SSE_OPS(cmpord);
SSE_OPS_S(cmpeq);
SSE_OPS_S(cmplt);
SSE_OPS_S(cmple);
SSE_OPS_S(cmpunord);
SSE_OPS_S(cmpneq);
SSE_OPS_S(cmpnlt);
SSE_OPS_S(cmpnle);
SSE_OPS_S(cmpord);


a.d[0] = 2.7;
Expand Down

0 comments on commit c48d211

Please sign in to comment.