Skip to content

Commit

Permalink
tcg: Support arbitrary size + alignment
Browse files Browse the repository at this point in the history
Previously we allowed fully unaligned operations, but not operations
that are aligned but with less alignment than the operation size.

In addition, arm32, ia64, mips, and sparc had been omitted from the
previous overalignment patch, which would have led to that alignment
being enforced.

Signed-off-by: Richard Henderson <rth@twiddle.net>
  • Loading branch information
rth7680 committed Sep 16, 2016
1 parent ebc231d commit 85aa808
Show file tree
Hide file tree
Showing 10 changed files with 128 additions and 112 deletions.
16 changes: 8 additions & 8 deletions softmmu_template.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,14 +146,14 @@ WORD_TYPE helper_le_ld_name(CPUArchState *env, target_ulong addr,
unsigned mmu_idx = get_mmuidx(oi);
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
int a_bits = get_alignment_bits(get_memop(oi));
unsigned a_bits = get_alignment_bits(get_memop(oi));
uintptr_t haddr;
DATA_TYPE res;

/* Adjust the given return address. */
retaddr -= GETPC_ADJ;

if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
if (addr & ((1 << a_bits) - 1)) {
cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
mmu_idx, retaddr);
}
Expand Down Expand Up @@ -220,14 +220,14 @@ WORD_TYPE helper_be_ld_name(CPUArchState *env, target_ulong addr,
unsigned mmu_idx = get_mmuidx(oi);
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].ADDR_READ;
int a_bits = get_alignment_bits(get_memop(oi));
unsigned a_bits = get_alignment_bits(get_memop(oi));
uintptr_t haddr;
DATA_TYPE res;

/* Adjust the given return address. */
retaddr -= GETPC_ADJ;

if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
if (addr & ((1 << a_bits) - 1)) {
cpu_unaligned_access(ENV_GET_CPU(env), addr, READ_ACCESS_TYPE,
mmu_idx, retaddr);
}
Expand Down Expand Up @@ -331,13 +331,13 @@ void helper_le_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
unsigned mmu_idx = get_mmuidx(oi);
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
int a_bits = get_alignment_bits(get_memop(oi));
unsigned a_bits = get_alignment_bits(get_memop(oi));
uintptr_t haddr;

/* Adjust the given return address. */
retaddr -= GETPC_ADJ;

if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
if (addr & ((1 << a_bits) - 1)) {
cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
mmu_idx, retaddr);
}
Expand Down Expand Up @@ -414,13 +414,13 @@ void helper_be_st_name(CPUArchState *env, target_ulong addr, DATA_TYPE val,
unsigned mmu_idx = get_mmuidx(oi);
int index = (addr >> TARGET_PAGE_BITS) & (CPU_TLB_SIZE - 1);
target_ulong tlb_addr = env->tlb_table[mmu_idx][index].addr_write;
int a_bits = get_alignment_bits(get_memop(oi));
unsigned a_bits = get_alignment_bits(get_memop(oi));
uintptr_t haddr;

/* Adjust the given return address. */
retaddr -= GETPC_ADJ;

if (a_bits > 0 && (addr & ((1 << a_bits) - 1)) != 0) {
if (addr & ((1 << a_bits) - 1)) {
cpu_unaligned_access(ENV_GET_CPU(env), addr, MMU_DATA_STORE,
mmu_idx, retaddr);
}
Expand Down
13 changes: 7 additions & 6 deletions tcg/aarch64/tcg-target.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1081,23 +1081,24 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addr_reg, TCGMemOp opc,
int tlb_offset = is_read ?
offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
int a_bits = get_alignment_bits(opc);
unsigned a_bits = get_alignment_bits(opc);
unsigned s_bits = opc & MO_SIZE;
unsigned a_mask = (1u << a_bits) - 1;
unsigned s_mask = (1u << s_bits) - 1;
TCGReg base = TCG_AREG0, x3;
uint64_t tlb_mask;

/* For aligned accesses, we check the first byte and include the alignment
bits within the address. For unaligned access, we check that we don't
cross pages using the address of the last byte of the access. */
if (a_bits >= 0) {
/* A byte access or an alignment check required */
tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
if (a_bits >= s_bits) {
x3 = addr_reg;
} else {
tcg_out_insn(s, 3401, ADDI, TARGET_LONG_BITS == 64,
TCG_REG_X3, addr_reg, (1 << (opc & MO_SIZE)) - 1);
tlb_mask = TARGET_PAGE_MASK;
TCG_REG_X3, addr_reg, s_mask - a_mask);
x3 = TCG_REG_X3;
}
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;

/* Extract the TLB index from the address into X0.
X0<CPU_TLB_BITS:0> =
Expand Down
19 changes: 12 additions & 7 deletions tcg/arm/tcg-target.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1168,14 +1168,16 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */

static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
TCGMemOp s_bits, int mem_index, bool is_load)
TCGMemOp opc, int mem_index, bool is_load)
{
TCGReg base = TCG_AREG0;
int cmp_off =
(is_load
? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);

/* Should generate something like the following:
* shr tmp, addrlo, #TARGET_PAGE_BITS (1)
Expand Down Expand Up @@ -1216,10 +1218,13 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
}
}

/* Check alignment. */
if (s_bits) {
tcg_out_dat_imm(s, COND_AL, ARITH_TST,
0, addrlo, (1 << s_bits) - 1);
/* Check alignment. We don't support inline unaligned acceses,
but we can easily support overalignment checks. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
if (a_bits) {
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, (1 << a_bits) - 1);
}

/* Load the tlb addend. */
Expand Down Expand Up @@ -1499,7 +1504,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)

#ifdef CONFIG_SOFTMMU
mem_index = get_mmuidx(oi);
addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 1);
addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1);

/* This a conditional BL only to load a pointer within this opcode into LR
for the slow path. We will not be using the value for a tail call. */
Expand Down Expand Up @@ -1630,7 +1635,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)

#ifdef CONFIG_SOFTMMU
mem_index = get_mmuidx(oi);
addend = tcg_out_tlb_read(s, addrlo, addrhi, opc & MO_SIZE, mem_index, 0);
addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0);

tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);

Expand Down
19 changes: 10 additions & 9 deletions tcg/i386/tcg-target.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1202,7 +1202,10 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
TCGType ttype = TCG_TYPE_I32;
TCGType tlbtype = TCG_TYPE_I32;
int trexw = 0, hrexw = 0, tlbrexw = 0;
int a_bits = get_alignment_bits(opc);
unsigned a_bits = get_alignment_bits(opc);
unsigned s_bits = opc & MO_SIZE;
unsigned a_mask = (1 << a_bits) - 1;
unsigned s_mask = (1 << s_bits) - 1;
target_ulong tlb_mask;

if (TCG_TARGET_REG_BITS == 64) {
Expand All @@ -1220,17 +1223,15 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
}

tcg_out_mov(s, tlbtype, r0, addrlo);
if (a_bits >= 0) {
/* A byte access or an alignment check required */
/* If the required alignment is at least as large as the access, simply
copy the address and mask. For lesser alignments, check that we don't
cross pages for the complete access. */
if (a_bits >= s_bits) {
tcg_out_mov(s, ttype, r1, addrlo);
tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
} else {
/* For unaligned access check that we don't cross pages using
the page address of the last byte. */
tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo,
(1 << (opc & MO_SIZE)) - 1);
tlb_mask = TARGET_PAGE_MASK;
tcg_out_modrm_offset(s, OPC_LEA + trexw, r1, addrlo, s_mask - a_mask);
}
tlb_mask = TARGET_PAGE_MASK | a_mask;

tcg_out_shifti(s, SHIFT_SHR + tlbrexw, r0,
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
Expand Down
22 changes: 15 additions & 7 deletions tcg/ia64/tcg-target.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1496,10 +1496,18 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
R1, R3 are clobbered, leaving R56 free for...
BSWAP_1, BSWAP_2 and I-slot insns for swapping data for store. */
static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
TCGMemOp s_bits, int off_rw, int off_add,
TCGMemOp opc, int off_rw, int off_add,
uint64_t bswap1, uint64_t bswap2)
{
/*
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);

/* We don't support unaligned accesses, but overalignment is easy. */
if (a_bits < s_bits) {
a_bits = s_bits;
}

/*
.mii
mov r2 = off_rw
extr.u r3 = addr_reg, ... # extract tlb page
Expand All @@ -1521,7 +1529,7 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
cmp.eq p6, p7 = r3, r58
nop
;;
*/
*/
tcg_out_bundle(s, miI,
tcg_opc_movi_a(TCG_REG_P0, TCG_REG_R2, off_rw),
tcg_opc_i11(TCG_REG_P0, OPC_EXTR_U_I11, TCG_REG_R3,
Expand All @@ -1536,8 +1544,8 @@ static inline void tcg_out_qemu_tlb(TCGContext *s, TCGReg addr_reg,
TCG_REG_R3, 63 - CPU_TLB_ENTRY_BITS,
63 - CPU_TLB_ENTRY_BITS),
tcg_opc_i14(TCG_REG_P0, OPC_DEP_I14, TCG_REG_R1, 0,
TCG_REG_R57, 63 - s_bits,
TARGET_PAGE_BITS - s_bits - 1));
TCG_REG_R57, 63 - a_bits,
TARGET_PAGE_BITS - a_bits - 1));
tcg_out_bundle(s, MmI,
tcg_opc_a1 (TCG_REG_P0, OPC_ADD_A1,
TCG_REG_R2, TCG_REG_R2, TCG_REG_R3),
Expand Down Expand Up @@ -1661,7 +1669,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args)
s_bits = opc & MO_SIZE;

/* Read the TLB entry */
tcg_out_qemu_tlb(s, addr_reg, s_bits,
tcg_out_qemu_tlb(s, addr_reg, opc,
offsetof(CPUArchState, tlb_table[mem_index][0].addr_read),
offsetof(CPUArchState, tlb_table[mem_index][0].addend),
INSN_NOP_I, INSN_NOP_I);
Expand Down Expand Up @@ -1739,7 +1747,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const TCGArg *args)
pre1 = tcg_opc_ext_i(TCG_REG_P0, opc, TCG_REG_R58, data_reg);
}

tcg_out_qemu_tlb(s, addr_reg, s_bits,
tcg_out_qemu_tlb(s, addr_reg, opc,
offsetof(CPUArchState, tlb_table[mem_index][0].addr_write),
offsetof(CPUArchState, tlb_table[mem_index][0].addend),
pre1, pre2);
Expand Down
11 changes: 9 additions & 2 deletions tcg/mips/tcg-target.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1040,7 +1040,9 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
TCGReg addrh, TCGMemOpIdx oi,
tcg_insn_unit *label_ptr[2], bool is_load)
{
TCGMemOp s_bits = get_memop(oi) & MO_SIZE;
TCGMemOp opc = get_memop(oi);
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
int mem_index = get_mmuidx(oi);
int cmp_off
= (is_load
Expand Down Expand Up @@ -1071,10 +1073,15 @@ static void tcg_out_tlb_load(TCGContext *s, TCGReg base, TCGReg addrl,
tcg_out_opc_imm(s, OPC_LW, TCG_TMP0, TCG_REG_A0,
cmp_off + (TARGET_LONG_BITS == 64 ? LO_OFF : 0));

/* We don't currently support unaligned accesses.
We could do so with mips32r6. */
if (a_bits < s_bits) {
a_bits = s_bits;
}
/* Mask the page bits, keeping the alignment bits to compare against.
In between on 32-bit targets, load the tlb addend for the fast path. */
tcg_out_movi(s, TCG_TYPE_I32, TCG_TMP1,
TARGET_PAGE_MASK | ((1 << s_bits) - 1));
TARGET_PAGE_MASK | ((1 << a_bits) - 1));
if (TARGET_LONG_BITS == 32) {
tcg_out_opc_imm(s, OPC_LW, TCG_REG_A0, TCG_REG_A0, add_off);
}
Expand Down
58 changes: 31 additions & 27 deletions tcg/ppc/tcg-target.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1404,8 +1404,8 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
TCGReg base = TCG_AREG0;
TCGMemOp s_bits = opc & MO_SIZE;
int a_bits = get_alignment_bits(opc);
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);

/* Extract the page index, shifted into place for tlb index. */
if (TCG_TARGET_REG_BITS == 64) {
Expand Down Expand Up @@ -1458,39 +1458,43 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGMemOp opc,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_REG_R3, add_off);

/* Clear the non-page, non-alignment bits from the address */
if (TCG_TARGET_REG_BITS == 32 || TARGET_LONG_BITS == 32) {
/* We don't support unaligned accesses on 32-bits, preserve
* the bottom bits and thus trigger a comparison failure on
* unaligned accesses
if (TCG_TARGET_REG_BITS == 32) {
/* We don't support unaligned accesses on 32-bits.
* Preserve the bottom bits and thus trigger a comparison
* failure on unaligned accesses.
*/
if (a_bits < 0) {
if (a_bits < s_bits) {
a_bits = s_bits;
}
tcg_out_rlw(s, RLWINM, TCG_REG_R0, addrlo, 0,
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
} else if (a_bits) {
/* More than byte access, we need to handle alignment */
if (a_bits > 0) {
/* Alignment required by the front-end, same as 32-bits */
tcg_out_rld(s, RLDICL, TCG_REG_R0, addrlo,
} else {
TCGReg t = addrlo;

/* If the access is unaligned, we need to make sure we fail if we
* cross a page boundary. The trick is to add the access size-1
* to the address before masking the low bits. That will make the
* address overflow to the next page if we cross a page boundary,
* which will then force a mismatch of the TLB compare.
*/
if (a_bits < s_bits) {
unsigned a_mask = (1 << a_bits) - 1;
unsigned s_mask = (1 << s_bits) - 1;
tcg_out32(s, ADDI | TAI(TCG_REG_R0, t, s_mask - a_mask));
t = TCG_REG_R0;
}

/* Mask the address for the requested alignment. */
if (TARGET_LONG_BITS == 32) {
tcg_out_rlw(s, RLWINM, TCG_REG_R0, t, 0,
(32 - a_bits) & 31, 31 - TARGET_PAGE_BITS);
} else if (a_bits == 0) {
tcg_out_rld(s, RLDICR, TCG_REG_R0, t, 0, 63 - TARGET_PAGE_BITS);
} else {
tcg_out_rld(s, RLDICL, TCG_REG_R0, t,
64 - TARGET_PAGE_BITS, TARGET_PAGE_BITS - a_bits);
tcg_out_rld(s, RLDICL, TCG_REG_R0, TCG_REG_R0, TARGET_PAGE_BITS, 0);
} else {
/* We support unaligned accesses, we need to make sure we fail
* if we cross a page boundary. The trick is to add the
* access_size-1 to the address before masking the low bits.
* That will make the address overflow to the next page if we
* cross a page boundary which will then force a mismatch of
* the TLB compare since the next page cannot possibly be in
* the same TLB index.
*/
tcg_out32(s, ADDI | TAI(TCG_REG_R0, addrlo, (1 << s_bits) - 1));
tcg_out_rld(s, RLDICR, TCG_REG_R0, TCG_REG_R0,
0, 63 - TARGET_PAGE_BITS);
}
} else {
/* Byte access, just chop off the bits below the page index */
tcg_out_rld(s, RLDICR, TCG_REG_R0, addrlo, 0, 63 - TARGET_PAGE_BITS);
}

if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
Expand Down
15 changes: 6 additions & 9 deletions tcg/s390/tcg-target.inc.c
Original file line number Diff line number Diff line change
Expand Up @@ -1505,21 +1505,18 @@ QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
static TCGReg tcg_out_tlb_read(TCGContext* s, TCGReg addr_reg, TCGMemOp opc,
int mem_index, bool is_ld)
{
int a_bits = get_alignment_bits(opc);
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
unsigned s_mask = (1 << s_bits) - 1;
unsigned a_mask = (1 << a_bits) - 1;
int ofs, a_off;
uint64_t tlb_mask;

/* For aligned accesses, we check the first byte and include the alignment
bits within the address. For unaligned access, we check that we don't
cross pages using the address of the last byte of the access. */
if (a_bits >= 0) {
/* A byte access or an alignment check required */
a_off = 0;
tlb_mask = TARGET_PAGE_MASK | ((1 << a_bits) - 1);
} else {
a_off = (1 << (opc & MO_SIZE)) - 1;
tlb_mask = TARGET_PAGE_MASK;
}
a_off = (a_bits >= s_bits ? 0 : s_mask - a_mask);
tlb_mask = (uint64_t)TARGET_PAGE_MASK | a_mask;

if (facilities & FACILITY_GEN_INST_EXT) {
tcg_out_risbg(s, TCG_REG_R2, addr_reg,
Expand Down
Loading

0 comments on commit 85aa808

Please sign in to comment.