Skip to content

Commit

Permalink
Instructions unclear
Browse files Browse the repository at this point in the history
  • Loading branch information
jason-conway committed Aug 16, 2022
1 parent 5788a11 commit f46fa8f
Show file tree
Hide file tree
Showing 5 changed files with 229 additions and 34 deletions.
47 changes: 40 additions & 7 deletions emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -287,12 +287,24 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0x5c: TRACEI("subpd xmm:modrm, xmm");
READMODRM; V_OP(sub_p, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0x60: TRACEI("punpcklbw xmm:modrm, xmm");
READMODRM; V_OP(unpack_bw, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(unpackl_bw, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x61: TRACEI("punpcklwd xmm:modrm, xmm");
READMODRM; V_OP(unpackl_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x62: TRACEI("punpckldq xmm:modrm, xmm");
READMODRM; V_OP(unpack_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(unpackl_dq, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x68: TRACEI("punpckhbw xmm:modrm, xmm");
READMODRM; V_OP(unpackh_bw, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x69: TRACEI("punpckhwd xmm:modrm, xmm");
READMODRM; V_OP(unpackh_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x6a: TRACEI("punpckhdq xmm:modrm, xmm");
READMODRM; V_OP(unpackh_d, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x6b: TRACEI("packssdw xmm:modrm, xmm");
READMODRM; V_OP(packss_d, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x6c: TRACEI("punpcklqdq xmm:modrm, xmm");
READMODRM; V_OP(unpack_qdq, xmm_modrm_val, xmm_modrm_reg,128); break;

READMODRM; V_OP(unpackl_qdq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x6d: TRACEI("punpckhqdq xmm:modrm, xmm");
READMODRM; V_OP(unpackh_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x6e: TRACEI("movd modrm, xmm");
READMODRM; VMOV(modrm_val, xmm_modrm_reg,32); break;

Expand All @@ -301,11 +313,21 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {

case 0x70: TRACEI("pshufd xmm:modrm, xmm, imm8");
READMODRM; READIMM8; V_OP_IMM(shuffle_d, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x71: READMODRM_NOMEM;
switch (modrm.opcode) {
case 4: TRACEI("psraw imm, xmm");
READIMM8; V_OP(imm_shiftrs_w, imm, xmm_modrm_reg, 128); break;
case 6: TRACEI("psllw imm, xmm");
READIMM8; V_OP(imm_shiftl_w, imm, xmm_modrm_reg, 128); break;
default: UNDEFINED;
}
break;
case 0x72: READMODRM_NOMEM;
switch (modrm.opcode) {
case 2: TRACEI("psrld imm, xmm");
READIMM8; V_OP(imm_shiftr_d, imm, xmm_modrm_reg, 128); break;
case 4: TRACEI("psrad imm, xmm");
READIMM8; V_OP(imm_shiftrs_d, imm, xmm_modrm_reg, 128); break;
case 6: TRACEI("pslld imm, xmm");
READIMM8; V_OP(imm_shiftl_d, imm, xmm_modrm_reg, 128); break;
default: UNDEFINED;
Expand All @@ -315,6 +337,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
switch (modrm.opcode) {
case 2: TRACEI("psrlq imm, xmm");
READIMM8; V_OP(imm_shiftr_q, imm, xmm_modrm_reg, 128); break;
case 3: TRACEI("psrldq imm, xmm");
READIMM8; V_OP(imm_shiftr_dq, imm, xmm_modrm_reg, 128); break;
case 6: TRACEI("psllq imm, xmm");
READIMM8; V_OP(imm_shiftl_q, imm, xmm_modrm_reg, 128); break;
case 7: TRACEI("pslldq imm, xmm");
Expand All @@ -325,6 +349,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {

case 0x74: TRACEI("pcmpeqb xmm:modrm, xmm");
READMODRM; V_OP(compare_eqb, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x75: TRACEI("pcmpeqw xmm:modrm, xmm");
READMODRM; V_OP(compare_eqw, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x76: TRACEI("pcmpeqd xmm:modrm, xmm");
READMODRM; V_OP(compare_eqd, xmm_modrm_val, xmm_modrm_reg,128); break;

Expand Down Expand Up @@ -360,9 +386,10 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0xdf: TRACEI("pandn xmm:modrm, xmm");
READMODRM; V_OP(andn, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0xe4: TRACEI("pmulhuw xmm:modrm, xmm");
READMODRM; V_OP(muluu, xmm_modrm_val, xmm_modrm_reg, 128); break;
case 0xe5: TRACEI("pmulhw xmm:modrm, xmm");
READMODRM; V_OP(mulu, xmm_modrm_val, xmm_modrm_reg, 128); break;

case 0xe6: TRACEI("cvttpd2dq xmm:modrm, xmm");
READMODRM; V_OP(cvttpd2dq, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0xeb: TRACEI("por xmm:modrm, xmm");
Expand All @@ -373,10 +400,16 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(shiftl_q, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf4: TRACEI("pmuludq xmm:modrm, xmm");
READMODRM; V_OP(mulu_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf5: TRACEI("pmaddwd xmm:modrm, xmm");
READMODRM; V_OP(madd_d, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xfb: TRACEI("psubq xmm:modrm, xmm");
READMODRM; V_OP(sub_q, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf9: TRACEI("psubw xmm:modrm, xmm");
READMODRM; V_OP(sub_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xfc: TRACEI("paddb xmm:modrm, xmm");
READMODRM; V_OP(add_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xfd: TRACEI("paddw xmm:modrm, xmm");
READMODRM; V_OP(add_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xfe: TRACEI("paddd xmm:modrm, xmm");
READMODRM; V_OP(add_d, xmm_modrm_val, xmm_modrm_reg,128); break;
#else
Expand Down Expand Up @@ -407,7 +440,7 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(sub_p, xmm_modrm_val, xmm_modrm_reg,32); break;

case 0x62: TRACEI("punpckldq mm:modrm, mm");
READMODRM; V_OP(unpack_dq, mm_modrm_val, mm_modrm_reg,64); break;
READMODRM; V_OP(unpackl_dq, mm_modrm_val, mm_modrm_reg,64); break;

case 0x6e: TRACEI("movd modrm, mm");
READMODRM; VMOV(modrm_val, mm_modrm_reg,32); break;
Expand Down
116 changes: 112 additions & 4 deletions emu/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,16 @@ static inline void zero_xmm(union xmm_reg *xmm) {
xmm->qw[1] = 0;
}

static inline uint32_t satd(uint32_t dw) {
if (dw > 0xffff8000)
dw &= 0xffff;
else if (dw > 0x7fffffff)
dw = 0x8000;
else if (dw > 0x7fff)
dw = 0x7fff;
return dw;
}

#define VEC_ZERO_COPY(zero, copy) \
void vec_zero##zero##_copy##copy(NO_CPU, const void *src, void *dst) { \
memcpy(dst, src, copy/8); \
Expand All @@ -31,6 +41,15 @@ void vec_merge128(NO_CPU, const void *src, void *dst) {
memcpy(dst, src, 16);
}

void vec_imm_shiftl_w128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
if (amount > 15) {
zero_xmm(dst);
} else {
for (int i = 0; i < 8; i++) {
dst->u16[i] <<= amount;
}
}
}
void vec_imm_shiftl_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
if (amount > 63) {
zero_xmm(dst);
Expand Down Expand Up @@ -92,6 +111,13 @@ void vec_imm_shiftl_dq128(NO_CPU, uint8_t amount, union xmm_reg *dst) {
dst->u128 <<= amount * 8;
}

void vec_imm_shiftr_dq128(NO_CPU, uint8_t amount, union xmm_reg *dst) {
if (amount >= 16)
zero_xmm(dst);
else
dst->u128 >>= amount * 8;
}

void vec_shiftl_q128(NO_CPU, union xmm_reg *amount, union xmm_reg *dst) {
uint64_t amount_qw = amount->qw[0];

Expand All @@ -114,10 +140,31 @@ void vec_shiftr_q128(NO_CPU, union xmm_reg *amount, union xmm_reg *dst) {
}
}

void vec_imm_shiftrs_w128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
for (unsigned i = 0; i < 8; i++) {
if (amount > 15)
dst->u16[i] = ((dst->u16[i] >> 15) & (uint16_t)1) ? 0xffff : 0;
else
dst->u16[i] = ((int16_t)(dst->u16[i])) >> amount;
}
}
void vec_imm_shiftrs_d128(NO_CPU, const uint8_t amount, union xmm_reg *dst) {
for (unsigned i = 0; i < 4; i++) {
if (amount > 31)
dst->u32[i] = ((dst->u32[i] >> 31) & (uint32_t)1) ? 0xffffffff : 0;
else
dst->u32[i] = ((int32_t)(dst->u32[i])) >> amount;
}
}

void vec_add_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u8); i++)
dst->u8[i] += src->u8[i];
}
void vec_add_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u16); i++)
dst->u16[i] += src->u16[i];
}
void vec_add_d128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u32); i++)
dst->u32[i] += src->u32[i];
Expand All @@ -129,11 +176,26 @@ void vec_add_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
void vec_add_q64(NO_CPU, union mm_reg *src, union mm_reg *dst) {
dst->qw += src->qw;
}
void vec_sub_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u16); i++)
dst->u16[i] -= src->u16[i];
}
void vec_sub_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] -= src->qw[0];
dst->qw[1] -= src->qw[1];
}

void vec_madd_d128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = (int32_t)((int16_t)dst->u16[0] * (int16_t)src->u16[0]) +
(int32_t)((int16_t)dst->u16[1] * (int16_t)src->u16[1]);
dst->u32[1] = (int32_t)((int16_t)dst->u16[2] * (int16_t)src->u16[2]) +
(int32_t)((int16_t)dst->u16[3] * (int16_t)src->u16[3]);
dst->u32[2] = (int32_t)((int16_t)dst->u16[4] * (int16_t)src->u16[4]) +
(int32_t)((int16_t)dst->u16[5] * (int16_t)src->u16[5]);
dst->u32[3] = (int32_t)((int16_t)dst->u16[6] * (int16_t)src->u16[6]) +
(int32_t)((int16_t)dst->u16[7] * (int16_t)src->u16[7]);
}

void vec_mulu_dq128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] = (uint64_t) src->u32[0] * dst->u32[0];
dst->qw[1] = (uint64_t) src->u32[2] * dst->u32[2];
Expand Down Expand Up @@ -290,23 +352,58 @@ VEC_CVT(ss2sd32, float, double)
PACKED_VEC_CVT(tpd2dq64, f64, u32, double, int32_t, 2)
PACKED_VEC_CVT(tps2dq32, f32, u32, float, int32_t, 4)

void vec_unpack_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
void vec_unpackl_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (int i = 7; i >= 0; i--) {
dst->u8[i*2 + 1] = src->u8[i];
dst->u8[i*2] = dst->u8[i];
}
}
void vec_unpack_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
void vec_unpackl_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (int i = 3; i >= 0; i--) {
dst->u16[i*2 + 1] = src->u16[i];
dst->u16[i*2] = dst->u16[i];
}
}
void vec_unpackl_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[3] = src->u32[1];
dst->u32[2] = dst->u32[1];
dst->u32[1] = src->u32[0];
}
void vec_unpack_dq64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
void vec_unpackl_dq64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
dst->dw[1] = src->dw[0];
}
void vec_unpack_qdq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
void vec_unpackl_qdq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->qw[1] = src->qw[0];
}
void vec_unpackh_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (int i = 0; i < 8; i++) {
dst->u8[2 * i + 0] = dst->u8[i + 8];
dst->u8[2 * i + 1] = src->u8[i + 8];
}
}
void vec_unpackh_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (int i = 0; i < 4; i++) {
dst->u16[2 * i + 0] = dst->u16[i + 4];
dst->u16[2 * i + 1] = src->u16[i + 4];
}
}
void vec_unpackh_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = dst->u32[2];
dst->u32[1] = src->u32[2];
dst->u32[2] = dst->u32[3];
dst->u32[3] = src->u32[3];
}
void vec_unpackh_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->qw[0] = dst->qw[1];
dst->qw[1] = src->qw[1];
}

void vec_packss_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = satd(dst->u32[0]) | (satd(dst->u32[1]) << 16);
dst->u32[1] = satd(dst->u32[2]) | (satd(dst->u32[3]) << 16);
dst->u32[2] = satd(src->u32[0]) | (satd(src->u32[1]) << 16);
dst->u32[3] = satd(src->u32[2]) | (satd(src->u32[3]) << 16);
}

void vec_shuffle_lw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding) {
union xmm_reg src_copy = *src;
Expand All @@ -324,6 +421,10 @@ void vec_compare_eqb128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u8); i++)
dst->u8[i] = dst->u8[i] == src->u8[i] ? ~0 : 0;
}
void vec_compare_eqw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u16); i++)
dst->u16[i] = dst->u16[i] == src->u16[i] ? ~0 : 0;
}
void vec_compare_eqd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (unsigned i = 0; i < array_size(src->u32); i++)
dst->u32[i] = dst->u32[i] == src->u32[i] ? ~0 : 0;
Expand Down Expand Up @@ -380,3 +481,10 @@ void vec_mulu64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
}
dst->qw = d.qw;
}

void vec_muluu128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
for (int i = 0; i < 8; i++) {
uint32_t res = dst->u16[i] * src->u16[i];
dst->u16[i] = ((res >> 16) & 0xffff);
}
}
24 changes: 20 additions & 4 deletions emu/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,26 +18,34 @@ void vec_merge32(NO_CPU, const void *src, void *dst);
void vec_merge64(NO_CPU, const void *src, void *dst);
void vec_merge128(NO_CPU, const void *src, void *dst);

void vec_imm_shiftl_w128(NO_CPU, const uint8_t amount, union xmm_reg *dst);
void vec_imm_shiftl_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst);
void vec_imm_shiftl_d128(NO_CPU, const uint8_t amount, union xmm_reg *dst);
void vec_imm_shiftl_q64(NO_CPU, const uint8_t amount, union mm_reg *dst);
void vec_imm_shiftr_q128(NO_CPU, const uint8_t amount, union xmm_reg *dst);
void vec_imm_shiftr_d128(NO_CPU, const uint8_t amount, union xmm_reg *dst);
void vec_imm_shiftr_q64(NO_CPU, const uint8_t amount, union mm_reg *dst);
void vec_imm_shiftl_dq128(NO_CPU, const uint8_t amount, union xmm_reg *dst);
void vec_imm_shiftr_dq128(NO_CPU, uint8_t amount, union xmm_reg *dst);
void vec_imm_shiftrs_w128(NO_CPU, const uint8_t amount, union xmm_reg *dst);
void vec_imm_shiftrs_d128(NO_CPU, const uint8_t amount, union xmm_reg *dst);
void vec_shiftl_q128(NO_CPU, union xmm_reg *amount, union xmm_reg *dst);
void vec_shiftr_q128(NO_CPU, union xmm_reg *amount, union xmm_reg *dst);
void vec_add_b128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_d128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_q64(NO_CPU, union mm_reg *src, union mm_reg *dst);
void vec_sub_w128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_sub_q128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_mulu_dq128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_mulu_dq64(NO_CPU, union mm_reg *src, union mm_reg *dst);
void vec_mulu64(NO_CPU, const union mm_reg *src, union mm_reg *dst);
void vec_mull64(NO_CPU, const union mm_reg *src, union mm_reg *dst);
void vec_mulu128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_muluu128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_mull128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_madd_d128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);

void vec_add_p64(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
void vec_add_p32(NO_CPU, union xmm_reg *src, union xmm_reg *dst);
Expand Down Expand Up @@ -85,13 +93,21 @@ void vec_cvttpd2dq64(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_cvttps2dq32(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);

// TODO organize
void vec_unpack_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpack_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpack_dq64(NO_CPU, const union mm_reg *src, union mm_reg *dst);
void vec_unpack_qdq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_packss_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);

void vec_unpackl_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackl_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackl_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackl_dq64(NO_CPU, const union mm_reg *src, union mm_reg *dst);
void vec_unpackl_qdq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_bw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_shuffle_lw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
void vec_shuffle_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
void vec_compare_eqb128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_compare_eqw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_compare_eqd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_movmask_b128(NO_CPU, const union xmm_reg *src, uint32_t *dst);
void vec_fmovmask_d128(NO_CPU, const union xmm_reg *src, uint32_t *dst);
Expand Down
Loading

0 comments on commit f46fa8f

Please sign in to comment.