mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-07-06 13:55:22 +02:00
drm/amdgpu: cleanup conditional execution
First of all calculating the number of dw to patch into a conditional execution is not something HW generation specific. This is just standard ring buffer calculations. While at it also reduce the BUG_ON() into WARN_ON(). Then instead of a random bit pattern use 0 as default value for the number of dw skipped, this way it's not mandatory any more to patch the conditional execution. And last make the address to check a parameter of the conditional execution instead of getting this from the ring. Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
86e14a7386
commit
c68cbbfd54
|
@ -131,7 +131,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
|
||||||
struct amdgpu_ib *ib = &ibs[0];
|
struct amdgpu_ib *ib = &ibs[0];
|
||||||
struct dma_fence *tmp = NULL;
|
struct dma_fence *tmp = NULL;
|
||||||
bool need_ctx_switch;
|
bool need_ctx_switch;
|
||||||
unsigned int patch_offset = ~0;
|
|
||||||
struct amdgpu_vm *vm;
|
struct amdgpu_vm *vm;
|
||||||
uint64_t fence_ctx;
|
uint64_t fence_ctx;
|
||||||
uint32_t status = 0, alloc_size;
|
uint32_t status = 0, alloc_size;
|
||||||
|
@ -139,10 +138,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
|
||||||
bool secure, init_shadow;
|
bool secure, init_shadow;
|
||||||
u64 shadow_va, csa_va, gds_va;
|
u64 shadow_va, csa_va, gds_va;
|
||||||
int vmid = AMDGPU_JOB_GET_VMID(job);
|
int vmid = AMDGPU_JOB_GET_VMID(job);
|
||||||
|
bool need_pipe_sync = false;
|
||||||
|
unsigned int cond_exec;
|
||||||
|
|
||||||
unsigned int i;
|
unsigned int i;
|
||||||
int r = 0;
|
int r = 0;
|
||||||
bool need_pipe_sync = false;
|
|
||||||
|
|
||||||
if (num_ibs == 0)
|
if (num_ibs == 0)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -228,7 +228,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
|
||||||
init_shadow, vmid);
|
init_shadow, vmid);
|
||||||
|
|
||||||
if (ring->funcs->init_cond_exec)
|
if (ring->funcs->init_cond_exec)
|
||||||
patch_offset = amdgpu_ring_init_cond_exec(ring);
|
cond_exec = amdgpu_ring_init_cond_exec(ring,
|
||||||
|
ring->cond_exe_gpu_addr);
|
||||||
|
|
||||||
amdgpu_device_flush_hdp(adev, ring);
|
amdgpu_device_flush_hdp(adev, ring);
|
||||||
|
|
||||||
|
@ -278,16 +279,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
|
||||||
fence_flags | AMDGPU_FENCE_FLAG_64BIT);
|
fence_flags | AMDGPU_FENCE_FLAG_64BIT);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ring->funcs->emit_gfx_shadow) {
|
if (ring->funcs->emit_gfx_shadow && ring->funcs->init_cond_exec) {
|
||||||
amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
|
amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0);
|
||||||
|
amdgpu_ring_init_cond_exec(ring, ring->cond_exe_gpu_addr);
|
||||||
if (ring->funcs->init_cond_exec) {
|
|
||||||
unsigned int ce_offset = ~0;
|
|
||||||
|
|
||||||
ce_offset = amdgpu_ring_init_cond_exec(ring);
|
|
||||||
if (ce_offset != ~0 && ring->funcs->patch_cond_exec)
|
|
||||||
amdgpu_ring_patch_cond_exec(ring, ce_offset);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
r = amdgpu_fence_emit(ring, f, job, fence_flags);
|
r = amdgpu_fence_emit(ring, f, job, fence_flags);
|
||||||
|
@ -302,8 +296,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned int num_ibs,
|
||||||
if (ring->funcs->insert_end)
|
if (ring->funcs->insert_end)
|
||||||
ring->funcs->insert_end(ring);
|
ring->funcs->insert_end(ring);
|
||||||
|
|
||||||
if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
|
amdgpu_ring_patch_cond_exec(ring, cond_exec);
|
||||||
amdgpu_ring_patch_cond_exec(ring, patch_offset);
|
|
||||||
|
|
||||||
ring->current_ctx = fence_ctx;
|
ring->current_ctx = fence_ctx;
|
||||||
if (vm && ring->funcs->emit_switch_buffer)
|
if (vm && ring->funcs->emit_switch_buffer)
|
||||||
|
|
|
@ -209,8 +209,7 @@ struct amdgpu_ring_funcs {
|
||||||
void (*insert_end)(struct amdgpu_ring *ring);
|
void (*insert_end)(struct amdgpu_ring *ring);
|
||||||
/* pad the indirect buffer to the necessary number of dw */
|
/* pad the indirect buffer to the necessary number of dw */
|
||||||
void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
|
void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
|
||||||
unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
|
unsigned (*init_cond_exec)(struct amdgpu_ring *ring, uint64_t addr);
|
||||||
void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
|
|
||||||
/* note usage for clock and power gating */
|
/* note usage for clock and power gating */
|
||||||
void (*begin_use)(struct amdgpu_ring *ring);
|
void (*begin_use)(struct amdgpu_ring *ring);
|
||||||
void (*end_use)(struct amdgpu_ring *ring);
|
void (*end_use)(struct amdgpu_ring *ring);
|
||||||
|
@ -327,8 +326,7 @@ struct amdgpu_ring {
|
||||||
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
|
#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m))
|
||||||
#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
|
#define amdgpu_ring_emit_frame_cntl(r, b, s) (r)->funcs->emit_frame_cntl((r), (b), (s))
|
||||||
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
|
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
|
||||||
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
|
#define amdgpu_ring_init_cond_exec(r, a) (r)->funcs->init_cond_exec((r), (a))
|
||||||
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
|
|
||||||
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
|
#define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
|
||||||
#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
|
#define amdgpu_ring_patch_cntl(r, o) ((r)->funcs->patch_cntl((r), (o)))
|
||||||
#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
|
#define amdgpu_ring_patch_ce(r, o) ((r)->funcs->patch_ce((r), (o)))
|
||||||
|
@ -411,6 +409,30 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
|
||||||
ring->count_dw -= count_dw;
|
ring->count_dw -= count_dw;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* amdgpu_ring_patch_cond_exec - patch dw count of conditional execute
|
||||||
|
* @ring: amdgpu_ring structure
|
||||||
|
* @offset: offset returned by amdgpu_ring_init_cond_exec
|
||||||
|
*
|
||||||
|
* Calculate the dw count and patch it into a cond_exec command.
|
||||||
|
*/
|
||||||
|
static inline void amdgpu_ring_patch_cond_exec(struct amdgpu_ring *ring,
|
||||||
|
unsigned int offset)
|
||||||
|
{
|
||||||
|
unsigned cur;
|
||||||
|
|
||||||
|
if (!ring->funcs->init_cond_exec)
|
||||||
|
return;
|
||||||
|
|
||||||
|
WARN_ON(offset > ring->buf_mask);
|
||||||
|
WARN_ON(ring->ring[offset] != 0);
|
||||||
|
|
||||||
|
cur = (ring->wptr - 1) & ring->buf_mask;
|
||||||
|
if (cur < offset)
|
||||||
|
cur += ring->ring_size >> 2;
|
||||||
|
ring->ring[offset] = cur - offset;
|
||||||
|
}
|
||||||
|
|
||||||
#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \
|
#define amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset) \
|
||||||
(ring->is_mes_queue && ring->mes_ctx ? \
|
(ring->is_mes_queue && ring->mes_ctx ? \
|
||||||
(ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
|
(ring->mes_ctx->meta_data_gpu_addr + offset) : 0)
|
||||||
|
|
|
@ -658,7 +658,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||||
bool vm_flush_needed = job->vm_needs_flush;
|
bool vm_flush_needed = job->vm_needs_flush;
|
||||||
struct dma_fence *fence = NULL;
|
struct dma_fence *fence = NULL;
|
||||||
bool pasid_mapping_needed = false;
|
bool pasid_mapping_needed = false;
|
||||||
unsigned patch_offset = 0;
|
unsigned int patch;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
|
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
|
||||||
|
@ -685,7 +685,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||||
|
|
||||||
amdgpu_ring_ib_begin(ring);
|
amdgpu_ring_ib_begin(ring);
|
||||||
if (ring->funcs->init_cond_exec)
|
if (ring->funcs->init_cond_exec)
|
||||||
patch_offset = amdgpu_ring_init_cond_exec(ring);
|
patch = amdgpu_ring_init_cond_exec(ring,
|
||||||
|
ring->cond_exe_gpu_addr);
|
||||||
|
|
||||||
if (need_pipe_sync)
|
if (need_pipe_sync)
|
||||||
amdgpu_ring_emit_pipeline_sync(ring);
|
amdgpu_ring_emit_pipeline_sync(ring);
|
||||||
|
@ -733,8 +734,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
|
||||||
}
|
}
|
||||||
dma_fence_put(fence);
|
dma_fence_put(fence);
|
||||||
|
|
||||||
if (ring->funcs->patch_cond_exec)
|
amdgpu_ring_patch_cond_exec(ring, patch);
|
||||||
amdgpu_ring_patch_cond_exec(ring, patch_offset);
|
|
||||||
|
|
||||||
/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
|
/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
|
||||||
if (ring->funcs->emit_switch_buffer) {
|
if (ring->funcs->emit_switch_buffer) {
|
||||||
|
|
|
@ -546,34 +546,21 @@ static void vpe_ring_emit_vm_flush(struct amdgpu_ring *ring, unsigned int vmid,
|
||||||
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
|
amdgpu_gmc_emit_flush_gpu_tlb(ring, vmid, pd_addr);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring)
|
static unsigned int vpe_ring_init_cond_exec(struct amdgpu_ring *ring,
|
||||||
|
uint64_t addr)
|
||||||
{
|
{
|
||||||
unsigned int ret;
|
unsigned int ret;
|
||||||
|
|
||||||
amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
|
amdgpu_ring_write(ring, VPE_CMD_HEADER(VPE_CMD_OPCODE_COND_EXE, 0));
|
||||||
amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, lower_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, upper_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, 1);
|
amdgpu_ring_write(ring, 1);
|
||||||
ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
|
ret = ring->wptr & ring->buf_mask;
|
||||||
amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
|
amdgpu_ring_write(ring, 0);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void vpe_ring_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
|
|
||||||
{
|
|
||||||
unsigned int cur;
|
|
||||||
|
|
||||||
WARN_ON_ONCE(offset > ring->buf_mask);
|
|
||||||
WARN_ON_ONCE(ring->ring[offset] != 0x55aa55aa);
|
|
||||||
|
|
||||||
cur = (ring->wptr - 1) & ring->buf_mask;
|
|
||||||
if (cur > offset)
|
|
||||||
ring->ring[offset] = cur - offset;
|
|
||||||
else
|
|
||||||
ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
|
static int vpe_ring_preempt_ib(struct amdgpu_ring *ring)
|
||||||
{
|
{
|
||||||
struct amdgpu_device *adev = ring->adev;
|
struct amdgpu_device *adev = ring->adev;
|
||||||
|
@ -864,7 +851,6 @@ static const struct amdgpu_ring_funcs vpe_ring_funcs = {
|
||||||
.test_ring = vpe_ring_test_ring,
|
.test_ring = vpe_ring_test_ring,
|
||||||
.test_ib = vpe_ring_test_ib,
|
.test_ib = vpe_ring_test_ib,
|
||||||
.init_cond_exec = vpe_ring_init_cond_exec,
|
.init_cond_exec = vpe_ring_init_cond_exec,
|
||||||
.patch_cond_exec = vpe_ring_patch_cond_exec,
|
|
||||||
.preempt_ib = vpe_ring_preempt_ib,
|
.preempt_ib = vpe_ring_preempt_ib,
|
||||||
.begin_use = vpe_ring_begin_use,
|
.begin_use = vpe_ring_begin_use,
|
||||||
.end_use = vpe_ring_end_use,
|
.end_use = vpe_ring_end_use,
|
||||||
|
|
|
@ -8542,34 +8542,23 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
|
||||||
amdgpu_ring_write(ring, 0);
|
amdgpu_ring_write(ring, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
|
static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
|
||||||
|
uint64_t addr)
|
||||||
{
|
{
|
||||||
unsigned int ret;
|
unsigned int ret;
|
||||||
|
|
||||||
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
|
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
|
||||||
amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, lower_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, upper_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
|
/* discard following DWs if *cond_exec_gpu_addr==0 */
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
ret = ring->wptr & ring->buf_mask;
|
ret = ring->wptr & ring->buf_mask;
|
||||||
amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
|
/* patch dummy value later */
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx_v10_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned int offset)
|
|
||||||
{
|
|
||||||
unsigned int cur;
|
|
||||||
|
|
||||||
BUG_ON(offset > ring->buf_mask);
|
|
||||||
BUG_ON(ring->ring[offset] != 0x55aa55aa);
|
|
||||||
|
|
||||||
cur = (ring->wptr - 1) & ring->buf_mask;
|
|
||||||
if (likely(cur > offset))
|
|
||||||
ring->ring[offset] = cur - offset;
|
|
||||||
else
|
|
||||||
ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
|
static int gfx_v10_0_ring_preempt_ib(struct amdgpu_ring *ring)
|
||||||
{
|
{
|
||||||
int i, r = 0;
|
int i, r = 0;
|
||||||
|
@ -9224,7 +9213,6 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
|
||||||
.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
|
.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
|
||||||
.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
|
.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
|
||||||
.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
|
.init_cond_exec = gfx_v10_0_ring_emit_init_cond_exec,
|
||||||
.patch_cond_exec = gfx_v10_0_ring_emit_patch_cond_exec,
|
|
||||||
.preempt_ib = gfx_v10_0_ring_preempt_ib,
|
.preempt_ib = gfx_v10_0_ring_preempt_ib,
|
||||||
.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
|
.emit_frame_cntl = gfx_v10_0_ring_emit_frame_cntl,
|
||||||
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
.emit_wreg = gfx_v10_0_ring_emit_wreg,
|
||||||
|
|
|
@ -5533,33 +5533,23 @@ static void gfx_v11_0_ring_emit_gfx_shadow(struct amdgpu_ring *ring,
|
||||||
PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
|
PACKET3_SET_Q_PREEMPTION_MODE_INIT_SHADOW_MEM : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
|
static unsigned gfx_v11_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
|
||||||
|
uint64_t addr)
|
||||||
{
|
{
|
||||||
unsigned ret;
|
unsigned ret;
|
||||||
|
|
||||||
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
|
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
|
||||||
amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, lower_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, upper_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
|
/* discard following DWs if *cond_exec_gpu_addr==0 */
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
ret = ring->wptr & ring->buf_mask;
|
ret = ring->wptr & ring->buf_mask;
|
||||||
amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
|
/* patch dummy value later */
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx_v11_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
|
|
||||||
{
|
|
||||||
unsigned cur;
|
|
||||||
BUG_ON(offset > ring->buf_mask);
|
|
||||||
BUG_ON(ring->ring[offset] != 0x55aa55aa);
|
|
||||||
|
|
||||||
cur = (ring->wptr - 1) & ring->buf_mask;
|
|
||||||
if (likely(cur > offset))
|
|
||||||
ring->ring[offset] = cur - offset;
|
|
||||||
else
|
|
||||||
ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
|
static int gfx_v11_0_ring_preempt_ib(struct amdgpu_ring *ring)
|
||||||
{
|
{
|
||||||
int i, r = 0;
|
int i, r = 0;
|
||||||
|
@ -6153,7 +6143,6 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
|
||||||
.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
|
.emit_cntxcntl = gfx_v11_0_ring_emit_cntxcntl,
|
||||||
.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
|
.emit_gfx_shadow = gfx_v11_0_ring_emit_gfx_shadow,
|
||||||
.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
|
.init_cond_exec = gfx_v11_0_ring_emit_init_cond_exec,
|
||||||
.patch_cond_exec = gfx_v11_0_ring_emit_patch_cond_exec,
|
|
||||||
.preempt_ib = gfx_v11_0_ring_preempt_ib,
|
.preempt_ib = gfx_v11_0_ring_preempt_ib,
|
||||||
.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
|
.emit_frame_cntl = gfx_v11_0_ring_emit_frame_cntl,
|
||||||
.emit_wreg = gfx_v11_0_ring_emit_wreg,
|
.emit_wreg = gfx_v11_0_ring_emit_wreg,
|
||||||
|
|
|
@ -6326,33 +6326,22 @@ static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
|
||||||
amdgpu_ring_write(ring, 0);
|
amdgpu_ring_write(ring, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
|
static unsigned gfx_v8_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
|
||||||
|
uint64_t addr)
|
||||||
{
|
{
|
||||||
unsigned ret;
|
unsigned ret;
|
||||||
|
|
||||||
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
|
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
|
||||||
amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, lower_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, upper_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
|
/* discard following DWs if *cond_exec_gpu_addr==0 */
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
ret = ring->wptr & ring->buf_mask;
|
ret = ring->wptr & ring->buf_mask;
|
||||||
amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
|
/* patch dummy value later */
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
|
|
||||||
{
|
|
||||||
unsigned cur;
|
|
||||||
|
|
||||||
BUG_ON(offset > ring->buf_mask);
|
|
||||||
BUG_ON(ring->ring[offset] != 0x55aa55aa);
|
|
||||||
|
|
||||||
cur = (ring->wptr & ring->buf_mask) - 1;
|
|
||||||
if (likely(cur > offset))
|
|
||||||
ring->ring[offset] = cur - offset;
|
|
||||||
else
|
|
||||||
ring->ring[offset] = (ring->ring_size >> 2) - offset + cur;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
|
static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
|
||||||
uint32_t reg_val_offs)
|
uint32_t reg_val_offs)
|
||||||
{
|
{
|
||||||
|
@ -6932,7 +6921,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
|
||||||
.emit_switch_buffer = gfx_v8_ring_emit_sb,
|
.emit_switch_buffer = gfx_v8_ring_emit_sb,
|
||||||
.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
|
.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
|
||||||
.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
|
.init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec,
|
||||||
.patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec,
|
|
||||||
.emit_wreg = gfx_v8_0_ring_emit_wreg,
|
.emit_wreg = gfx_v8_0_ring_emit_wreg,
|
||||||
.soft_recovery = gfx_v8_0_ring_soft_recovery,
|
.soft_recovery = gfx_v8_0_ring_soft_recovery,
|
||||||
.emit_mem_sync = gfx_v8_0_emit_mem_sync,
|
.emit_mem_sync = gfx_v8_0_emit_mem_sync,
|
||||||
|
|
|
@ -5610,31 +5610,21 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
|
||||||
amdgpu_ring_write(ring, 0);
|
amdgpu_ring_write(ring, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring)
|
static unsigned gfx_v9_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
|
||||||
|
uint64_t addr)
|
||||||
{
|
{
|
||||||
unsigned ret;
|
unsigned ret;
|
||||||
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
|
amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
|
||||||
amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, lower_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, upper_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, 0); /* discard following DWs if *cond_exec_gpu_addr==0 */
|
/* discard following DWs if *cond_exec_gpu_addr==0 */
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
ret = ring->wptr & ring->buf_mask;
|
ret = ring->wptr & ring->buf_mask;
|
||||||
amdgpu_ring_write(ring, 0x55aa55aa); /* patch dummy value later */
|
/* patch dummy value later */
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigned offset)
|
|
||||||
{
|
|
||||||
unsigned cur;
|
|
||||||
BUG_ON(offset > ring->buf_mask);
|
|
||||||
BUG_ON(ring->ring[offset] != 0x55aa55aa);
|
|
||||||
|
|
||||||
cur = (ring->wptr - 1) & ring->buf_mask;
|
|
||||||
if (likely(cur > offset))
|
|
||||||
ring->ring[offset] = cur - offset;
|
|
||||||
else
|
|
||||||
ring->ring[offset] = (ring->ring_size>>2) - offset + cur;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
|
static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
|
||||||
uint32_t reg_val_offs)
|
uint32_t reg_val_offs)
|
||||||
{
|
{
|
||||||
|
@ -6908,7 +6898,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
|
||||||
.emit_switch_buffer = gfx_v9_ring_emit_sb,
|
.emit_switch_buffer = gfx_v9_ring_emit_sb,
|
||||||
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
|
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
|
||||||
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
|
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
|
||||||
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
|
|
||||||
.preempt_ib = gfx_v9_0_ring_preempt_ib,
|
.preempt_ib = gfx_v9_0_ring_preempt_ib,
|
||||||
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
|
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
|
||||||
.emit_wreg = gfx_v9_0_ring_emit_wreg,
|
.emit_wreg = gfx_v9_0_ring_emit_wreg,
|
||||||
|
@ -6963,7 +6952,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
|
||||||
.emit_switch_buffer = gfx_v9_ring_emit_sb,
|
.emit_switch_buffer = gfx_v9_ring_emit_sb,
|
||||||
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
|
.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
|
||||||
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
|
.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
|
||||||
.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
|
|
||||||
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
|
.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
|
||||||
.emit_wreg = gfx_v9_0_ring_emit_wreg,
|
.emit_wreg = gfx_v9_0_ring_emit_wreg,
|
||||||
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
|
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
|
||||||
|
|
|
@ -249,35 +249,23 @@ static int sdma_v5_0_init_microcode(struct amdgpu_device *adev)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring)
|
static unsigned sdma_v5_0_ring_init_cond_exec(struct amdgpu_ring *ring,
|
||||||
|
uint64_t addr)
|
||||||
{
|
{
|
||||||
unsigned ret;
|
unsigned ret;
|
||||||
|
|
||||||
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
|
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
|
||||||
amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, lower_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, upper_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, 1);
|
amdgpu_ring_write(ring, 1);
|
||||||
ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
|
/* this is the offset we need patch later */
|
||||||
amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
|
ret = ring->wptr & ring->buf_mask;
|
||||||
|
/* insert dummy here and patch it later */
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sdma_v5_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
|
|
||||||
unsigned offset)
|
|
||||||
{
|
|
||||||
unsigned cur;
|
|
||||||
|
|
||||||
BUG_ON(offset > ring->buf_mask);
|
|
||||||
BUG_ON(ring->ring[offset] != 0x55aa55aa);
|
|
||||||
|
|
||||||
cur = (ring->wptr - 1) & ring->buf_mask;
|
|
||||||
if (cur > offset)
|
|
||||||
ring->ring[offset] = cur - offset;
|
|
||||||
else
|
|
||||||
ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sdma_v5_0_ring_get_rptr - get the current read pointer
|
* sdma_v5_0_ring_get_rptr - get the current read pointer
|
||||||
*
|
*
|
||||||
|
@ -1780,7 +1768,6 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
|
||||||
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
|
.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
|
||||||
.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
|
.emit_reg_write_reg_wait = sdma_v5_0_ring_emit_reg_write_reg_wait,
|
||||||
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
|
.init_cond_exec = sdma_v5_0_ring_init_cond_exec,
|
||||||
.patch_cond_exec = sdma_v5_0_ring_patch_cond_exec,
|
|
||||||
.preempt_ib = sdma_v5_0_ring_preempt_ib,
|
.preempt_ib = sdma_v5_0_ring_preempt_ib,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -89,35 +89,23 @@ static u32 sdma_v5_2_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
|
||||||
return base + internal_offset;
|
return base + internal_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring)
|
static unsigned sdma_v5_2_ring_init_cond_exec(struct amdgpu_ring *ring,
|
||||||
|
uint64_t addr)
|
||||||
{
|
{
|
||||||
unsigned ret;
|
unsigned ret;
|
||||||
|
|
||||||
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
|
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COND_EXE));
|
||||||
amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, lower_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, upper_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, 1);
|
amdgpu_ring_write(ring, 1);
|
||||||
ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
|
/* this is the offset we need patch later */
|
||||||
amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
|
ret = ring->wptr & ring->buf_mask;
|
||||||
|
/* insert dummy here and patch it later */
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sdma_v5_2_ring_patch_cond_exec(struct amdgpu_ring *ring,
|
|
||||||
unsigned offset)
|
|
||||||
{
|
|
||||||
unsigned cur;
|
|
||||||
|
|
||||||
BUG_ON(offset > ring->buf_mask);
|
|
||||||
BUG_ON(ring->ring[offset] != 0x55aa55aa);
|
|
||||||
|
|
||||||
cur = (ring->wptr - 1) & ring->buf_mask;
|
|
||||||
if (cur > offset)
|
|
||||||
ring->ring[offset] = cur - offset;
|
|
||||||
else
|
|
||||||
ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sdma_v5_2_ring_get_rptr - get the current read pointer
|
* sdma_v5_2_ring_get_rptr - get the current read pointer
|
||||||
*
|
*
|
||||||
|
@ -1722,7 +1710,6 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
|
||||||
.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
|
.emit_reg_wait = sdma_v5_2_ring_emit_reg_wait,
|
||||||
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
|
.emit_reg_write_reg_wait = sdma_v5_2_ring_emit_reg_write_reg_wait,
|
||||||
.init_cond_exec = sdma_v5_2_ring_init_cond_exec,
|
.init_cond_exec = sdma_v5_2_ring_init_cond_exec,
|
||||||
.patch_cond_exec = sdma_v5_2_ring_patch_cond_exec,
|
|
||||||
.preempt_ib = sdma_v5_2_ring_preempt_ib,
|
.preempt_ib = sdma_v5_2_ring_preempt_ib,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -80,35 +80,23 @@ static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u3
|
||||||
return base + internal_offset;
|
return base + internal_offset;
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring)
|
static unsigned sdma_v6_0_ring_init_cond_exec(struct amdgpu_ring *ring,
|
||||||
|
uint64_t addr)
|
||||||
{
|
{
|
||||||
unsigned ret;
|
unsigned ret;
|
||||||
|
|
||||||
amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
|
amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COND_EXE));
|
||||||
amdgpu_ring_write(ring, lower_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, lower_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, upper_32_bits(ring->cond_exe_gpu_addr));
|
amdgpu_ring_write(ring, upper_32_bits(addr));
|
||||||
amdgpu_ring_write(ring, 1);
|
amdgpu_ring_write(ring, 1);
|
||||||
ret = ring->wptr & ring->buf_mask;/* this is the offset we need patch later */
|
/* this is the offset we need patch later */
|
||||||
amdgpu_ring_write(ring, 0x55aa55aa);/* insert dummy here and patch it later */
|
ret = ring->wptr & ring->buf_mask;
|
||||||
|
/* insert dummy here and patch it later */
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void sdma_v6_0_ring_patch_cond_exec(struct amdgpu_ring *ring,
|
|
||||||
unsigned offset)
|
|
||||||
{
|
|
||||||
unsigned cur;
|
|
||||||
|
|
||||||
BUG_ON(offset > ring->buf_mask);
|
|
||||||
BUG_ON(ring->ring[offset] != 0x55aa55aa);
|
|
||||||
|
|
||||||
cur = (ring->wptr - 1) & ring->buf_mask;
|
|
||||||
if (cur > offset)
|
|
||||||
ring->ring[offset] = cur - offset;
|
|
||||||
else
|
|
||||||
ring->ring[offset] = (ring->buf_mask + 1) - offset + cur;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sdma_v6_0_ring_get_rptr - get the current read pointer
|
* sdma_v6_0_ring_get_rptr - get the current read pointer
|
||||||
*
|
*
|
||||||
|
@ -1542,7 +1530,6 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
|
||||||
.emit_reg_wait = sdma_v6_0_ring_emit_reg_wait,
|
.emit_reg_wait = sdma_v6_0_ring_emit_reg_wait,
|
||||||
.emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait,
|
.emit_reg_write_reg_wait = sdma_v6_0_ring_emit_reg_write_reg_wait,
|
||||||
.init_cond_exec = sdma_v6_0_ring_init_cond_exec,
|
.init_cond_exec = sdma_v6_0_ring_init_cond_exec,
|
||||||
.patch_cond_exec = sdma_v6_0_ring_patch_cond_exec,
|
|
||||||
.preempt_ib = sdma_v6_0_ring_preempt_ib,
|
.preempt_ib = sdma_v6_0_ring_preempt_ib,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user