Skip to content

Commit

Permalink
drm/msm: Implement preemption for A5XX targets
Browse files Browse the repository at this point in the history
Implement preemption for A5XX targets - this allows multiple
ringbuffers for different priorities with automatic preemption
of a lower priority ringbuffer if a higher one is ready.

Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
Signed-off-by: Rob Clark <robdclark@gmail.com>
  • Loading branch information
Jordan Crouse authored and robclark committed Oct 28, 2017
1 parent 4d87fc3 commit b1fc283
Show file tree
Hide file tree
Showing 10 changed files with 599 additions and 20 deletions.
1 change: 1 addition & 0 deletions drivers/gpu/drm/msm/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ msm-y := \
adreno/a4xx_gpu.o \
adreno/a5xx_gpu.o \
adreno/a5xx_power.o \
adreno/a5xx_preempt.o \
hdmi/hdmi.o \
hdmi/hdmi_audio.o \
hdmi/hdmi_bridge.o \
Expand Down
176 changes: 171 additions & 5 deletions drivers/gpu/drm/msm/adreno/a5xx_gpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,13 +113,65 @@ static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
return ret;
}

static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
uint32_t wptr;
unsigned long flags;

spin_lock_irqsave(&ring->lock, flags);

/* Copy the shadow to the actual register */
ring->cur = ring->next;

/* Make sure to wrap wptr if we need to */
wptr = get_wptr(ring);

spin_unlock_irqrestore(&ring->lock, flags);

/* Make sure everything is posted before making a decision */
mb();

/* Update HW if this is the current ring and we are not in preempt */
if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
}

static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
struct msm_file_private *ctx)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
struct msm_drm_private *priv = gpu->dev->dev_private;
struct msm_ringbuffer *ring = submit->ring;
unsigned int i, ibs = 0;

OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
OUT_RING(ring, 0x02);

/* Turn off protected mode to write to special registers */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 0);

/* Set the save preemption record for the ring/command */
OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));

/* Turn back on protected mode */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 1);

/* Enable local preemption for finegrain preemption */
OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
OUT_RING(ring, 0x02);

/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
OUT_RING(ring, 0x02);

/* Submit the commands */
for (i = 0; i < submit->nr_cmds; i++) {
switch (submit->cmd[i].type) {
case MSM_SUBMIT_CMD_IB_TARGET_BUF:
Expand All @@ -137,16 +189,54 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
}
}

/*
* Write the render mode to NULL (0) to indicate to the CP that the IBs
* are done rendering - otherwise a lucky preemption would start
* replaying from the last checkpoint
*/
OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
OUT_RING(ring, 0);
OUT_RING(ring, 0);
OUT_RING(ring, 0);
OUT_RING(ring, 0);
OUT_RING(ring, 0);

/* Turn off IB level preemptions */
OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
OUT_RING(ring, 0x01);

/* Write the fence to the scratch register */
OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
OUT_RING(ring, submit->seqno);

/*
* Execute a CACHE_FLUSH_TS event. This will ensure that the
* timestamp is written to the memory and then triggers the interrupt
*/
OUT_PKT7(ring, CP_EVENT_WRITE, 4);
OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
OUT_RING(ring, submit->seqno);

gpu->funcs->flush(gpu, ring);
/* Yield the floor on command completion */
OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
/*
* If dword[2:1] are non zero, they specify an address for the CP to
* write the value of dword[3] to on preemption complete. Write 0 to
* skip the write
*/
OUT_RING(ring, 0x00);
OUT_RING(ring, 0x00);
/* Data value - not used if the address above is 0 */
OUT_RING(ring, 0x01);
/* Set bit 0 to trigger an interrupt on preempt complete */
OUT_RING(ring, 0x01);

a5xx_flush(gpu, ring);

/* Check to see if we need to start preemption */
a5xx_preempt_trigger(gpu);
}

static const struct {
Expand Down Expand Up @@ -297,6 +387,50 @@ static int a5xx_me_init(struct msm_gpu *gpu)
return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
}

static int a5xx_preempt_start(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
struct msm_ringbuffer *ring = gpu->rb[0];

if (gpu->nr_rings == 1)
return 0;

/* Turn off protected mode to write to special registers */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 0);

/* Set the save preemption record for the ring/command */
OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));

/* Turn back on protected mode */
OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
OUT_RING(ring, 1);

OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
OUT_RING(ring, 0x00);

OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
OUT_RING(ring, 0x01);

OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
OUT_RING(ring, 0x01);

/* Yield the floor on command completion */
OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
OUT_RING(ring, 0x00);
OUT_RING(ring, 0x00);
OUT_RING(ring, 0x01);
OUT_RING(ring, 0x01);

gpu->funcs->flush(gpu, ring);

return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
}


static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu,
const struct firmware *fw, u64 *iova)
{
Expand Down Expand Up @@ -412,6 +546,7 @@ static int a5xx_zap_shader_init(struct msm_gpu *gpu)
A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
A5XX_RBBM_INT_0_MASK_CP_SW | \
A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
Expand Down Expand Up @@ -556,6 +691,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
if (ret)
return ret;

a5xx_preempt_hw_init(gpu);

a5xx_gpmu_ucode_init(gpu);

ret = a5xx_ucode_init(gpu);
Expand Down Expand Up @@ -610,6 +747,9 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
}

/* Last step - yield the ringbuffer */
a5xx_preempt_start(gpu);

return 0;
}

Expand Down Expand Up @@ -640,6 +780,8 @@ static void a5xx_destroy(struct msm_gpu *gpu)

DBG("%s", gpu->name);

a5xx_preempt_fini(gpu);

if (a5xx_gpu->pm4_bo) {
if (a5xx_gpu->pm4_iova)
msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
Expand Down Expand Up @@ -677,6 +819,14 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu)

bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);

if (ring != a5xx_gpu->cur_ring) {
WARN(1, "Tried to idle a non-current ringbuffer\n");
return false;
}

/* wait for CP to drain ringbuffer: */
if (!adreno_idle(gpu, ring))
return false;
Expand Down Expand Up @@ -871,8 +1021,13 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
a5xx_gpmu_err_irq(gpu);

if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS)
if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
a5xx_preempt_trigger(gpu);
msm_gpu_retire(gpu);
}

if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
a5xx_preempt_irq(gpu);

return IRQ_HANDLED;
}
Expand Down Expand Up @@ -1002,6 +1157,14 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
}
#endif

static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
{
struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);

return a5xx_gpu->cur_ring;
}

static const struct adreno_gpu_funcs funcs = {
.base = {
.get_param = adreno_get_param,
Expand All @@ -1010,8 +1173,8 @@ static const struct adreno_gpu_funcs funcs = {
.pm_resume = a5xx_pm_resume,
.recover = a5xx_recover,
.submit = a5xx_submit,
.flush = adreno_flush,
.active_ring = adreno_active_ring,
.flush = a5xx_flush,
.active_ring = a5xx_active_ring,
.irq = a5xx_irq,
.destroy = a5xx_destroy,
#ifdef CONFIG_DEBUG_FS
Expand Down Expand Up @@ -1047,7 +1210,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)

a5xx_gpu->lm_leakage = 0x4E001A;

ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
if (ret) {
a5xx_destroy(&(a5xx_gpu->base.base));
return ERR_PTR(ret);
Expand All @@ -1056,5 +1219,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
if (gpu->aspace)
msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);

/* Set up the preemption specific bits and pieces for each ringbuffer */
a5xx_preempt_init(gpu);

return gpu;
}
Loading

0 comments on commit b1fc283

Please sign in to comment.