Skip to content

Commit

Permalink
Merge pull request ish-app#2044 from jason-conway/shrinkflation
Browse files Browse the repository at this point in the history
  • Loading branch information
tbodt authored Feb 22, 2023
2 parents 4f5052a + 3c04ac1 commit f8a41ec
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 317 deletions.
18 changes: 9 additions & 9 deletions emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,13 +289,13 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(fmovmask_d, xmm_modrm_val, modrm_reg,128); break;

case 0x54: TRACEI("andpd xmm:modrm, xmm");
READMODRM; V_OP(and, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(and_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x55: TRACEI("andnpd xmm:modrm, xmm");
READMODRM; V_OP(andn, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x56: TRACEI("orpd xmm:modrm, xmm");
READMODRM; V_OP(or, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(or_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x57: TRACEI("xorpd xmm:modrm, xmm");
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(xor_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x58: TRACEI("addpd xmm:modrm, xmm");
READMODRM; V_OP(add_p, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0x59: TRACEI("mulpd xmm:modrm, xmm");
Expand Down Expand Up @@ -413,7 +413,7 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0xda: TRACEI("pminub xmm:modrm, xmm");
READMODRM; V_OP(min_ub, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xdb: TRACEI("pand xmm:modrm, xmm");
READMODRM; V_OP(and, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(and_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xdc: TRACEI("paddusb xmm:modrm, xmm");
READMODRM; V_OP(addus_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xdd: TRACEI("paddusw xmm:modrm, xmm");
Expand Down Expand Up @@ -445,15 +445,15 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
case 0xea: TRACEI("pminsw xmm:modrm, xmm");
READMODRM; V_OP(mins_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xeb: TRACEI("por xmm:modrm, xmm");
READMODRM; V_OP(or, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(or_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xec: TRACEI("paddsb xmm:modrm, xmm");
READMODRM; V_OP(addss_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xed: TRACEI("paddsw xmm:modrm, xmm");
READMODRM; V_OP(addss_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xee: TRACEI("pmaxsw xmm:modrm, xmm");
READMODRM; V_OP(maxs_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xef: TRACEI("pxor xmm:modrm, xmm");
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(xor_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf1: TRACEI("psllw xmm:modrm, xmm");
READMODRM; V_OP(shiftl_w, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xf2: TRACEI("pslld xmm:modrm, xmm");
Expand Down Expand Up @@ -503,13 +503,13 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(single_ucomi, xmm_modrm_val, xmm_modrm_reg,32); break;

case 0x54: TRACEI("andps xmm:modrm, xmm");
READMODRM; V_OP(and, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(and_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x55: TRACEI("andnps xmm:modrm, xmm");
READMODRM; V_OP(andn, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x56: TRACEI("orps xmm:modrm, xmm");
READMODRM; V_OP(or, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(or_dq, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x57: TRACEI("xorps xmm:modrm, xmm");
READMODRM; V_OP(xor, xmm_modrm_val, xmm_modrm_reg,128); break;
READMODRM; V_OP(xor_dq, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x58: TRACEI("addps xmm:modrm, xmm");
READMODRM; V_OP(add_p, xmm_modrm_val, xmm_modrm_reg,32); break;
Expand Down
139 changes: 34 additions & 105 deletions emu/mmx.c
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,41 @@ union vec {

#define VEC_MMX_OP(name, suffix, op, size) \
void vec_##name##_##suffix##64(NO_CPU, const union mm_reg *src, union mm_reg *dst) { \
union vec s = { .qw = src->qw}, d = { .qw = dst->qw }; \
union vec s = { .qw = src->qw }, d = { .qw = dst->qw }; \
for (unsigned i = 0; i < array_size(s.u##size); i++) \
d.u##size[i] op##= s.u##size[i]; \
dst->qw = d.qw; \
}

#define _VEC_MMX_CMP(sgn, usgn, suffix, relop, size) \
void vec_compare##sgn##_##suffix##64(NO_CPU, const union mm_reg *src, union mm_reg *dst) { \
union vec s = { .qw = src->qw}, d = { .qw = dst->qw }; \
union vec s = { .qw = src->qw }, d = { .qw = dst->qw }; \
for (unsigned i = 0; i < array_size(s.u##size); i++) \
d.u##size[i] = (usgn##int##size##_t)d.u##size[i] relop (usgn##int##size##_t)s.u##size[i] ? ~0 : 0;\
dst->qw = d.qw; \
}

#define _SHIFT(op, size) \
do { \
if (unlikely(amount > (size)-1)) { \
dst->qw = 0; \
} else { \
union vec d = { .qw = dst->qw }; \
for (unsigned i = 0; i < array_size(d.u##size); i++) \
d.u##size[i] op##= amount; \
dst->qw = d.qw; \
} \
} while (0)

#define VEC_MMX_SHIFT(dir, suffix, op, size) \
void vec_shift##dir##_##suffix##64(NO_CPU, const union mm_reg *src, union mm_reg *dst) { \
const uint8_t amount = src->qw; \
_SHIFT(op, size); \
} \
void vec_imm_shift##dir##_##suffix##64(NO_CPU, const uint8_t amount, union mm_reg *dst) { \
_SHIFT(op, size); \
}

#define VEC_MMX_CMPD(suffix, relop, size) \
_VEC_MMX_CMP(, u, suffix, relop, size)
#define VEC_MMX_CMPS(suffix, relop, size) \
Expand Down Expand Up @@ -62,105 +83,13 @@ VEC_MMX_CMPS(gtb, >, 8)
VEC_MMX_CMPS(gtw, >, 16)
VEC_MMX_CMPS(gtd, >, 32)

void vec_shiftl_w64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 15) {
dst->qw = 0;
} else {
union vec d = { .qw = dst->qw};
for (unsigned i = 0; i < array_size(d.u16); i++)
d.u16[i] <<= src->qw;
dst->qw = d.qw;
}
}
void vec_shiftl_d64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 31) {
dst->qw = 0;
} else {
dst->dw[0] <<= src->qw;
dst->dw[1] <<= src->qw;
}
}
void vec_shiftl_q64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 63)
dst->qw = 0;
else
dst->qw <<= src->qw;
}

void vec_shiftr_w64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 15) {
dst->qw = 0;
} else {
union vec d = { .qw = dst->qw};
for (unsigned i = 0; i < array_size(d.u16); i++)
d.u16[i] >>= src->qw;
dst->qw = d.qw;
}
}
void vec_shiftr_d64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 31) {
dst->qw = 0;
} else {
dst->dw[0] >>= src->qw;
dst->dw[1] >>= src->qw;
}
}
void vec_shiftr_q64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
if (src->qw > 63)
dst->qw = 0;
else
dst->qw >>= src->qw;
}

void vec_imm_shiftl_w64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 15) {
dst->qw = 0;
} else {
union vec d = { .qw = dst->qw};
for (unsigned i = 0; i < array_size(d.u16); i++)
d.u16[i] <<= amount;
dst->qw = d.qw;
}
}
void vec_imm_shiftl_d64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 31) {
dst->qw = 0;
} else {
dst->dw[0] <<= amount;
dst->dw[1] <<= amount;
}
}
void vec_imm_shiftl_q64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 63)
dst->qw = 0;
else
dst->qw <<= amount;
}
VEC_MMX_SHIFT(r, w, >>, 16)
VEC_MMX_SHIFT(r, d, >>, 32)
VEC_MMX_SHIFT(r, q, >>, 64)

void vec_imm_shiftr_w64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 15) {
dst->qw = 0;
} else {
union vec d = { .qw = dst->qw};
for (unsigned i = 0; i < array_size(d.u16); i++)
d.u16[i] >>= amount;
dst->qw = d.qw;
}
}
void vec_imm_shiftr_d64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 31) {
dst->qw = 0;
} else {
dst->dw[0] >>= amount;
dst->dw[1] >>= amount;
}
}
void vec_imm_shiftr_q64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
if (amount > 63)
dst->qw = 0;
else
dst->qw >>= amount;
}
VEC_MMX_SHIFT(l, w, <<, 16)
VEC_MMX_SHIFT(l, d, <<, 32)
VEC_MMX_SHIFT(l, q, <<, 64)

void vec_shiftrs_w64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
union vec d = { .qw = dst->qw };
Expand Down Expand Up @@ -206,15 +135,15 @@ void vec_imm_shiftrs_d64(NO_CPU, const uint8_t amount, union mm_reg *dst) {
}

void vec_mulu64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
union vec s = { .qw = src->qw}, d = { .qw = dst->qw };
union vec s = { .qw = src->qw }, d = { .qw = dst->qw };
for (unsigned i = 0; i < 4; i++) {
uint32_t res = ((int16_t)d.u16[i] * (int16_t)s.u16[i]);
d.u16[i] = ((res >> 16) & 0xffff);
}
dst->qw = d.qw;
}
void vec_mull64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
union vec s = { .qw = src->qw}, d = { .qw = dst->qw };
union vec s = { .qw = src->qw }, d = { .qw = dst->qw };
for (int i = 0; i < 4; i++) {
d.u16[i] = (uint16_t)(d.u16[i] * s.u16[i]);
}
Expand All @@ -229,14 +158,14 @@ void vec_unpackl_dq64(NO_CPU, const union mm_reg *src, union mm_reg *dst) {
}

void vec_shuffle_w64(NO_CPU, const union mm_reg *src, union mm_reg *dst, uint8_t encoding) {
union vec s = { .qw = src->qw}, d = { .qw = dst->qw };
union vec s = { .qw = src->qw }, d = { .qw = dst->qw };
for (unsigned i = 0; i < 4; i++)
d.u16[i] = s.u16[(encoding >> (2 * i)) % 4];
dst->qw = d.qw;
}

void vec_movmask_b64(NO_CPU, const union mm_reg *src, uint32_t *dst) {
union vec s = { .qw = src->qw};
union vec s = { .qw = src->qw };
*dst = 0;
for (unsigned i = 0; i < array_size(s.u8); i++) {
if (s.u8[i] & (1 << 7))
Expand All @@ -245,7 +174,7 @@ void vec_movmask_b64(NO_CPU, const union mm_reg *src, uint32_t *dst) {
}

void vec_insert_w64(NO_CPU, const uint32_t *src, union mm_reg *dst, uint8_t index) {
union vec d = { .qw = dst->qw};
union vec d = { .qw = dst->qw };
d.u16[index % 4] = (uint16_t)*src;
dst->qw = d.qw;
}
Loading

0 comments on commit f8a41ec

Please sign in to comment.