Skip to content

Commit

Permalink
Add packsswb, pavgb, pavgw, psubb, and psubd
Browse files Browse the repository at this point in the history
  • Loading branch information
jason-conway committed Aug 18, 2022
1 parent fae5f7e commit 1b07c20
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 15 deletions.
12 changes: 10 additions & 2 deletions emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(unpackl_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x62: TRACEI("punpckldq xmm:modrm, xmm");
READMODRM; V_OP(unpackl_dq, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x63: TRACEI("packsswb xmm:modrm, xmm");
READMODRM; V_OP(packss_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x68: TRACEI("punpckhbw xmm:modrm, xmm");
READMODRM; V_OP(unpackh_bw, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x69: TRACEI("punpckhwd xmm:modrm, xmm");
Expand Down Expand Up @@ -387,7 +388,10 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(max_ub, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xdf: TRACEI("pandn xmm:modrm, xmm");
READMODRM; V_OP(andn, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0xe0: TRACEI("pavgb xmm:modrm, xmm");
READMODRM; V_OP(avg_b, xmm_modrm_val, xmm_modrm_reg, 128); break;
case 0xe3: TRACEI("pavgw xmm:modrm, xmm");
READMODRM; V_OP(avg_w, xmm_modrm_val, xmm_modrm_reg, 128); break;
case 0xe4: TRACEI("pmulhuw xmm:modrm, xmm");
READMODRM; V_OP(muluu, xmm_modrm_val, xmm_modrm_reg, 128); break;
case 0xe5: TRACEI("pmulhw xmm:modrm, xmm");
Expand All @@ -406,8 +410,12 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(madd_d, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xfb: TRACEI("psubq xmm:modrm, xmm");
READMODRM; V_OP(sub_q, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf8: TRACEI("psubb xmm:modrm, xmm");
READMODRM; V_OP(sub_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf9: TRACEI("psubw xmm:modrm, xmm");
READMODRM; V_OP(sub_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xfa: TRACEI("psubd xmm:modrm, xmm");
READMODRM; V_OP(sub_d, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xfc: TRACEI("paddb xmm:modrm, xmm");
READMODRM; V_OP(add_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xfd: TRACEI("paddw xmm:modrm, xmm");
Expand Down
38 changes: 36 additions & 2 deletions emu/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,15 @@ static inline void zero_xmm(union xmm_reg *xmm) {
xmm->qw[1] = 0;
}

static inline int32_t satw(int32_t dw) {
if (dw > 0xff80)
dw &= 0xff;
else if (dw > 0x7fff)
dw = 0x80;
else if (dw > 0x7f)
dw = 0x7f;
return dw;
}
static inline uint32_t satd(uint32_t dw) {
if (dw > 0xffff8000)
dw &= 0xffff;
Expand Down Expand Up @@ -58,7 +67,6 @@ void vec_imm_shiftl_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
dst->qw[1] <<= amount;
}
}

void vec_imm_shiftl_d128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
if (amount > 31) {
zero_xmm(dst);
Expand All @@ -85,7 +93,6 @@ void vec_imm_shiftr_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
dst->qw[1] >>= amount;
}
}

void vec_imm_shiftr_d128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
if (amount > 31) {
zero_xmm(dst);
Expand Down Expand Up @@ -185,10 +192,18 @@ void vec_add_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
void vec_add_q64(NO_CPU, union mm_reg *src, union mm_reg *dst) {
dst->qw += src->qw;
}
void vec_sub_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u8); i++)
dst->u8[i] -= src->u8[i];
}
void vec_sub_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u16); i++)
dst->u16[i] -= src->u16[i];
}
void vec_sub_d128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u32); i++)
dst->u32[i] -= src->u32[i];
}
void vec_sub_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] -= src->qw[0];
dst->qw[1] -= src->qw[1];
Expand Down Expand Up @@ -407,6 +422,16 @@ void vec_unpackh_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->qw[1] = src->qw[1];
}

void vec_packss_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = (satw(dst->u16[0]) << 0x00) | (satw(dst->u16[1]) << 0x08) |
(satw(dst->u16[2]) << 0x10) | (satw(dst->u16[3]) << 0x18);
dst->u32[1] = (satw(dst->u16[4]) << 0x00) | (satw(dst->u16[5]) << 0x08) |
(satw(dst->u16[6]) << 0x10) | (satw(dst->u16[7]) << 0x18);
dst->u32[2] = (satw(src->u16[0]) << 0x00) | (satw(src->u16[1]) << 0x08) |
(satw(src->u16[2]) << 0x10) | (satw(src->u16[3]) << 0x18);
dst->u32[3] = (satw(src->u16[4]) << 0x00) | (satw(src->u16[5]) << 0x08) |
(satw(src->u16[6]) << 0x10) | (satw(src->u16[7]) << 0x18);
}
void vec_packss_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = satd(dst->u32[0]) | (satd(dst->u32[1]) << 16);
dst->u32[1] = satd(dst->u32[2]) | (satd(dst->u32[3]) << 16);
Expand Down Expand Up @@ -465,6 +490,15 @@ void vec_extract_w128(NO_CPU, const union xmm_reg *src, uint32_t *dst, uint8_t i
*dst = src->u16[index % 8];
}

void vec_avg_b128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for(unsigned i = 0; i < 16; i++)
dst->u8[i] = (1 + dst->u8[i] + src->u8[i]) >> 1;
}
void vec_avg_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for(unsigned i = 0; i < 8; i++)
dst->u16[i] = (1 + dst->u16[i] + src->u16[i]) >> 1;
}

void vec_mull128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (int i = 0; i < 8; i++) {
dst->u16[i] = (uint16_t)(dst->u16[i] * src->u16[i]);
Expand Down
5 changes: 5 additions & 0 deletions emu/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ void vec_add_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_d128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_q64(NO_CPU, union mm_reg *src, union mm_reg *dst);
void vec_sub_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_sub_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_sub_d128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_sub_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_mulu_dq128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_mulu_dq64(NO_CPU, union mm_reg *src, union mm_reg *dst);
Expand Down Expand Up @@ -94,6 +96,7 @@ void vec_cvttpd2dq64(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_cvttps2dq32(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);

// TODO organize
void vec_packss_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_packss_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);

void vec_unpackl_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
Expand All @@ -114,5 +117,7 @@ void vec_compare_eqd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_movmask_b128(NO_CPU, const union xmm_reg *src, uint32_t *dst);
void vec_fmovmask_d128(NO_CPU, const union xmm_reg *src, uint32_t *dst);
void vec_extract_w128(NO_CPU, const union xmm_reg *src, uint32_t *dst, uint8_t index);
void vec_avg_b128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_avg_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);

#endif
10 changes: 10 additions & 0 deletions tests/e2e/qemu/expected.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4358,6 +4358,8 @@ punpcklwd: a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab
punpcklwd: a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=0f76007c255a62c20854085427f827f8
punpckldq: a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1f297ccd456723c658bad7ab98694873
punpckldq: a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=0f76255a007c62c2085427f8085427f8
packsswb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=7f7f80807f7f7f80807f807f7f7f807f
packsswb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=8080807f7f7f7f7f7f80807f7c7f7f7f
punpckhbw: a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=41dcf2511e5cfbffa994e34ae15846ec
punpckhbw: a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=c223331be9e9e8e8c4cdc9e743439a8d
punpckhwd: a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=41f2dc511efb5cffa9e3944ae14658ec
Expand Down Expand Up @@ -4408,8 +4410,12 @@ pmuludq : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab
pmuludq : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=9e46f0ab618189d200455e29c0fd8040
pmaddwd : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=020fe597198f142619e3240dd0aece1b
pmaddwd : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=f96e96a11d6d10310e703f5c0682dbd0
psubb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9b5f3e04eb6777a6263ea7f940af71c8
psubb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=61e80000091e00f3f1063d6800000000
psubw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9a5f3e04ea6777a6263ea6f93faf70c8
psubw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=60e80000091efff3f1063d6800000000
psubd : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9a5f3e04ea6677a6263da6f93fae70c8
psubd : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=60e80000091dfff3f1063d6800000000
psubq : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=9a5f3e03ea6677a6263da6f93fae70c8
psubq : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=60e80000091dfff3f1063d6800000000
paddb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1d437afa3d2d393264909f93f0231f1e
Expand All @@ -4418,6 +4424,10 @@ paddw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab
paddw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e54ed3d092b087270ff2881c10a84ff0
paddd : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1e437bfa3e2e3a326490a093f124201e
paddd : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=e54fd3d092b087270ff2881c10a84ff0
pavgb : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=8fa23dfd9f979d99324850ca7892908f
pavgb : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=7327e9e8c9d843940879448e085427f8
pavgw : a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=8f223dfd9f179d193248504a7892900f
pavgw : a=231be9e8cde7438d007c62c2085427f8 b=c233e9e8c4c9439a0f76255a085427f8 r=72a7e9e8c958439407f9440e085427f8
pextrw : r=0000944a
pmovmskb : r=00009918
punpcklqdq: a=dc515cff944a58ec456723c698694873 b=41f21efba9e3e1461f297ccd58bad7ab r=1f297ccd58bad7ab456723c698694873
Expand Down
27 changes: 16 additions & 11 deletions tests/e2e/qemu/qemu-test.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,15 @@
#define CC_S 0x0080
#define CC_O 0x0800

#define __init_call __attribute__ ((unused,__section__ ("initcall")))
#ifdef __APPLE__
extern void *__start_initcall asm("section$start$__DATA$initcall");
extern void *__stop_initcall asm("section$end$__DATA$initcall");
#define __init_call __attribute__ ((unused,__section__("__DATA,initcall")))
#else
extern void *__start_initcall;
extern void *__stop_initcall;
#define __init_call __attribute__ ((unused,__section__ ("initcall")))
#endif

#define CC_MASK (CC_C | CC_P | CC_Z | CC_S | CC_O | CC_A)

Expand Down Expand Up @@ -1694,6 +1702,7 @@ void test_vm86(void)
}
#endif

#ifndef __APPLE__
/* exception tests */
#if defined(__i386__) && !defined(REG_EAX)
#define REG_EAX EAX
Expand Down Expand Up @@ -2104,7 +2113,7 @@ static void test_enter(void)
TEST_ENTER("w", uint16_t, 2);
TEST_ENTER("w", uint16_t, 31);
}

#endif
#ifdef TEST_SSE

typedef int __m64 __attribute__ ((__mode__ (__V2SI__)));
Expand Down Expand Up @@ -2428,7 +2437,7 @@ void test_sse(void)
SSE_OP2(punpcklbw);
SSE_OP2(punpcklwd);
SSE_OP2(punpckldq);
// MMX_OP2(packsswb);
SSE_OP2(packsswb);
// MMX_OP2(pcmpgtb);
// MMX_OP2(pcmpgtw);
// MMX_OP2(pcmpgtd);
Expand Down Expand Up @@ -2466,16 +2475,16 @@ void test_sse(void)
MMX_OP2(pmuludq);
SSE_OP2(pmaddwd);
// MMX_OP2(psadbw);
// MMX_OP2(psubb);
SSE_OP2(psubb);
SSE_OP2(psubw);
// MMX_OP2(psubd);
SSE_OP2(psubd);
SSE_OP2(psubq);
SSE_OP2(paddb);
SSE_OP2(paddw);
SSE_OP2(paddd);

// MMX_OP2(pavgb);
// MMX_OP2(pavgw);
SSE_OP2(pavgb);
SSE_OP2(pavgw);

// asm volatile ("pinsrw $1, %1, %0" : "=y" (r.q[0]) : "r" (0x12345678));
// printf("%-9s: r=" FMT64X "\n", "pinsrw", r.q[0]);
Expand Down Expand Up @@ -2751,10 +2760,6 @@ void test_conv(void)
#endif
}

extern void *__start_initcall;
extern void *__stop_initcall;


int main(int argc, char **argv)
{
void **ptr;
Expand Down

0 comments on commit 1b07c20

Please sign in to comment.