Skip to content

Commit

Permalink
Add Intel encodings for popcnt.
Browse files Browse the repository at this point in the history
Change the result type for the bit-counting instructions from a fixed i8
to the iB type variable which is the type of the input. This matches the
convention in WebAssembly, and at least Intel's instructions will set a
full register's worth of count result, even if it is always < 64.

Duplicate the Intel 'ur' encoding recipe into 'umr' and 'urm' variants
corresponding to the RM and MR encoding variants. The difference is
which register is encoded as 'reg' and which is 'r/m' in the ModR/M
byte. A 'mov' register copy uses the MR variant, a unary popcnt uses the
RM variant.
  • Loading branch information
Jakob Stoklund Olesen committed Jul 12, 2017
1 parent 5615e4a commit b6f2f0d
Show file tree
Hide file tree
Showing 7 changed files with 79 additions and 14 deletions.
7 changes: 7 additions & 0 deletions filetests/isa/intel/binary32.cton
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,13 @@ ebb0:
; asm: movsbl -50000(%esi), %edx
[-,%rdx] v129 = sload8.i32 v2-50000 ; bin: 0f be 96 ffff3cb0

; Bit-counting instructions.

; asm: popcntl %esi, %ecx
[-,%rcx] v200 = popcnt v2 ; bin: f3 0f b8 ce
; asm: popcntl %ecx, %esi
[-,%rsi] v201 = popcnt v1 ; bin: f3 0f b8 f1

; asm: call foo
call fn0() ; bin: e8 PCRel4(fn0) 00000000

Expand Down
18 changes: 18 additions & 0 deletions filetests/isa/intel/binary64.cton
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,15 @@ ebb0:
; asm: movq %rcx, %r10
[-,%r10] v112 = copy v1 ; bin: 49 89 ca

; Bit-counting instructions.

; asm: popcntq %rsi, %rcx
[-,%rcx] v200 = popcnt v2 ; bin: f3 48 0f b8 ce
; asm: popcntq %r10, %rsi
[-,%rsi] v201 = popcnt v3 ; bin: f3 49 0f b8 f2
; asm: popcntq %rcx, %r10
[-,%r10] v202 = popcnt v1 ; bin: f3 4c 0f b8 d1

return ; bin: c3
}

Expand Down Expand Up @@ -290,5 +299,14 @@ ebb0:
; asm: movl %ecx, %r10d
[-,%r10] v112 = copy v1 ; bin: 41 89 ca

; Bit-counting instructions.

; asm: popcntl %esi, %ecx
[-,%rcx] v200 = popcnt v2 ; bin: f3 40 0f b8 ce
; asm: popcntl %r10d, %esi
[-,%rsi] v201 = popcnt v3 ; bin: f3 41 0f b8 f2
; asm: popcntl %ecx, %r10d
[-,%r10] v202 = popcnt v1 ; bin: f3 44 0f b8 d1

return ; bin: c3
}
7 changes: 6 additions & 1 deletion filetests/wasm/i32-arith.cton
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,12 @@ ebb0:

; function %i32_clz(i32) -> i32
; function %i32_ctz(i32) -> i32
; function %i32_popcnt(i32) -> i32

function %i32_popcnt(i32) -> i32 {
ebb0(v0: i32):
v1 = popcnt v0
return v1
}

; Binary operations.

Expand Down
4 changes: 2 additions & 2 deletions lib/cretonne/meta/base/instructions.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from cdsl.operands import Operand, VARIABLE_ARGS
from cdsl.typevar import TypeVar
from cdsl.instructions import Instruction, InstructionGroup
from base.types import i8, f32, f64, b1
from base.types import f32, f64, b1
from base.immediates import imm64, uimm8, ieee32, ieee64, offset32, uoffset32
from base.immediates import intcc, floatcc, memflags, regunit
from base import entities
Expand Down Expand Up @@ -1050,7 +1050,7 @@
#

x = Operand('x', iB)
a = Operand('a', i8)
a = Operand('a', iB)

clz = Instruction(
'clz', r"""
Expand Down
14 changes: 10 additions & 4 deletions lib/cretonne/meta/isa/intel/encodings.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@
# default. Otherwise reg-alloc would never use r8 and up.
I64.enc(inst.i32, *r.rr(opc))

I32.enc(base.copy.i32, *r.ur(0x89))
I64.enc(base.copy.i64, *r.ur.rex(0x89, w=1))
I64.enc(base.copy.i32, *r.ur.rex(0x89))
I64.enc(base.copy.i32, *r.ur(0x89))
I32.enc(base.copy.i32, *r.umr(0x89))
I64.enc(base.copy.i64, *r.umr.rex(0x89, w=1))
I64.enc(base.copy.i32, *r.umr.rex(0x89))
I64.enc(base.copy.i32, *r.umr(0x89))

I32.enc(base.regmove.i32, *r.rmov(0x89))
I64.enc(base.regmove.i64, *r.rmov.rex(0x89, w=1))
Expand Down Expand Up @@ -80,6 +80,12 @@
I64.enc(inst.i32.i32, *r.rc.rex(0xd3, rrr=rrr))
I64.enc(inst.i32.i32, *r.rc(0xd3, rrr=rrr))

# Population count.
I32.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8))
I64.enc(base.popcnt.i64, *r.urm.rex(0xf3, 0x0f, 0xb8, w=1))
I64.enc(base.popcnt.i32, *r.urm.rex(0xf3, 0x0f, 0xb8))
I64.enc(base.popcnt.i32, *r.urm(0xf3, 0x0f, 0xb8))

# Loads and stores.
I32.enc(base.store.i32.i32, *r.st(0x89))
I32.enc(base.store.i32.i32, *r.stDisp8(0x89))
Expand Down
15 changes: 12 additions & 3 deletions lib/cretonne/meta/isa/intel/recipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,14 +198,23 @@ def rex(self, *ops, **kwargs):
''')

# XX /r, but for a unary operator with separate input/output register, like
# copies.
ur = TailRecipe(
'ur', Unary, size=1, ins=GPR, outs=GPR,
# copies. MR form.
umr = TailRecipe(
'umr', Unary, size=1, ins=GPR, outs=GPR,
emit='''
PUT_OP(bits, rex2(out_reg0, in_reg0), sink);
modrm_rr(out_reg0, in_reg0, sink);
''')

# XX /r, but for a unary operator with separate input/output register.
# RM form.
urm = TailRecipe(
'urm', Unary, size=1, ins=GPR, outs=GPR,
emit='''
PUT_OP(bits, rex2(in_reg0, out_reg0), sink);
modrm_rr(in_reg0, out_reg0, sink);
''')

# XX /r, for regmove instructions.
rmov = TailRecipe(
'ur', RegMove, size=1, ins=GPR, outs=(),
Expand Down
28 changes: 24 additions & 4 deletions lib/cretonne/src/isa/intel/binemit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ fn rex_prefix<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
// Emit a single-byte opcode with no REX prefix.
fn put_op1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x8f00, 0, "Invalid encoding bits for Op1*");
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less encoding");
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op1 encoding");
sink.put1(bits as u8);
}

Expand All @@ -71,17 +71,37 @@ fn put_rexop1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
// Emit two-byte opcode: 0F XX
fn put_op2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x8f00, 0x0400, "Invalid encoding bits for Op2*");
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less encoding");
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Op2 encoding");
sink.put1(0x0f);
sink.put1(bits as u8);
}

// Emit single-byte opcode with mandatory prefix.
fn put_mp1<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x0c00, 0, "Invalid encoding bits for Mp1*");
debug_assert_eq!(bits & 0x8c00, 0, "Invalid encoding bits for Mp1*");
let pp = (bits >> 8) & 3;
sink.put1(PREFIX[(pp - 1) as usize]);
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less encoding");
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp1 encoding");
sink.put1(bits as u8);
}

// Emit two-byte opcode (0F XX) with mandatory prefix.
fn put_mp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x8c00, 0x0400, "Invalid encoding bits for Mp2*");
let pp = (bits >> 8) & 3;
sink.put1(PREFIX[(pp - 1) as usize]);
debug_assert_eq!(rex, BASE_REX, "Invalid registers for REX-less Mp2 encoding");
sink.put1(0x0f);
sink.put1(bits as u8);
}

// Emit two-byte opcode (0F XX) with mandatory prefix and REX.
fn put_rexmp2<CS: CodeSink + ?Sized>(bits: u16, rex: u8, sink: &mut CS) {
debug_assert_eq!(bits & 0x0c00, 0x0400, "Invalid encoding bits for Mp2*");
let pp = (bits >> 8) & 3;
sink.put1(PREFIX[(pp - 1) as usize]);
rex_prefix(bits, rex, sink);
sink.put1(0x0f);
sink.put1(bits as u8);
}

Expand Down

0 comments on commit b6f2f0d

Please sign in to comment.