Driver Changes:

- Fix missing HPD interrupt enabling, bringing one PM refactor with it
   (Imre / Maarten)
 - Workaround LNL GGTT invalidation not being visible to GuC
   (Matthew Brost)
 - Avoid getting jobs stuck without a protecting timeout (Matthew Brost)
 -----BEGIN PGP SIGNATURE-----
 
 iQJNBAABCAA3FiEE6rM8lpABPHM5FqyDm6KlpjDL6lMFAmckr1wZHGx1Y2FzLmRl
 bWFyY2hpQGludGVsLmNvbQAKCRCboqWmMMvqU5mUD/9ldyPILkvMvGT6yEBf7HWJ
 HwEs8oqUkN/NDxOXySrkiVPESLB9PH/SWo6YW7L0X7DkFq1rEEB6eWbEu4ZQ6Oef
 Q9wwx3XZ2oZwi5A/oa2mT9mvxN53vRVgEaKtPxej42N6zVcbtJgre9sfqa8MwF5u
 HNHP8tcKZNf8i+kzIEPrz33wLRH+Ecvkbp1k2E//cSHIWcsaq9ZmG8pnH7x8X/91
 fo0uLpTKhSJZzs5QygxnQ3X9BgIDgHpCu0mlTcmHBsH4wPr3u2uSdQ2cT97zZyiT
 7fATg/QdgwXH8lsmnbzPj0LjQd+AK4eLS3nD1PKzBN3+/5ZVy6ZVol6K41qUDNSC
 Tzzgo4/NOVfcRdyiWe6Elj07Snent8gtNGGphp1vAtUns8kGcJw3gDcOpX3cYHxa
 u+DQP7zasWwPAF2lqwApMHFZGe/zoH3w7zUZYCmGP4IImC3VM8NBaM4No6VNIvco
 +08yJUFBifS/4qYISYCwO9H7Dt+10WDxHrzF6w6sA89dL4mBf4fGvcfnxE33NuWn
 RIhfQ+WLC/vIzRwbOGmvAhbG4Owxjsacq7RQOUc1azJ66cCPCQCNfVGx6qf5Q23A
 0KbDoQXVlegKOz7z5m/ORXQO6A/Ien2VABLn0t0bjmoP7S57UFfiENn2P0c+UeNZ
 GJUi4QS5Ssdjq/DK6awoRw==
 =kIfk
 -----END PGP SIGNATURE-----

Merge tag 'drm-xe-fixes-2024-10-31' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes

Driver Changes:
- Fix missing HPD interrupt enabling, bringing one PM refactor with it
  (Imre / Maarten)
- Workaround LNL GGTT invalidation not being visible to GuC
  (Matthew Brost)
- Avoid getting jobs stuck without a protecting timeout (Matthew Brost)

Signed-off-by: Dave Airlie <airlied@redhat.com>

From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/tsbftadm7owyizzdaqnqu7u4tqggxgeqeztlfvmj5fryxlfomi@5m5bfv2zvzmw
This commit is contained in:
Dave Airlie 2024-11-02 04:44:02 +10:00
commit f99c7cca2f
5 changed files with 75 additions and 38 deletions

View File

@ -309,18 +309,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe)
} }
/* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */ /* TODO: System and runtime suspend/resume sequences will be sanitized as a follow-up. */
void xe_display_pm_runtime_suspend(struct xe_device *xe) static void __xe_display_pm_suspend(struct xe_device *xe, bool runtime)
{
if (!xe->info.probe_display)
return;
if (xe->d3cold.allowed)
xe_display_pm_suspend(xe, true);
intel_hpd_poll_enable(xe);
}
void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
{ {
struct intel_display *display = &xe->display; struct intel_display *display = &xe->display;
bool s2idle = suspend_to_idle(); bool s2idle = suspend_to_idle();
@ -353,6 +342,27 @@ void xe_display_pm_suspend(struct xe_device *xe, bool runtime)
intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold); intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);
intel_dmc_suspend(xe); intel_dmc_suspend(xe);
if (runtime && has_display(xe))
intel_hpd_poll_enable(xe);
}
void xe_display_pm_suspend(struct xe_device *xe)
{
__xe_display_pm_suspend(xe, false);
}
void xe_display_pm_runtime_suspend(struct xe_device *xe)
{
if (!xe->info.probe_display)
return;
if (xe->d3cold.allowed) {
__xe_display_pm_suspend(xe, true);
return;
}
intel_hpd_poll_enable(xe);
} }
void xe_display_pm_suspend_late(struct xe_device *xe) void xe_display_pm_suspend_late(struct xe_device *xe)
@ -366,17 +376,6 @@ void xe_display_pm_suspend_late(struct xe_device *xe)
intel_display_power_suspend_late(xe); intel_display_power_suspend_late(xe);
} }
void xe_display_pm_runtime_resume(struct xe_device *xe)
{
if (!xe->info.probe_display)
return;
intel_hpd_poll_disable(xe);
if (xe->d3cold.allowed)
xe_display_pm_resume(xe, true);
}
void xe_display_pm_resume_early(struct xe_device *xe) void xe_display_pm_resume_early(struct xe_device *xe)
{ {
if (!xe->info.probe_display) if (!xe->info.probe_display)
@ -387,7 +386,7 @@ void xe_display_pm_resume_early(struct xe_device *xe)
intel_power_domains_resume(xe); intel_power_domains_resume(xe);
} }
void xe_display_pm_resume(struct xe_device *xe, bool runtime) static void __xe_display_pm_resume(struct xe_device *xe, bool runtime)
{ {
struct intel_display *display = &xe->display; struct intel_display *display = &xe->display;
@ -411,9 +410,11 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_display_driver_resume(xe); intel_display_driver_resume(xe);
drm_kms_helper_poll_enable(&xe->drm); drm_kms_helper_poll_enable(&xe->drm);
intel_display_driver_enable_user_access(xe); intel_display_driver_enable_user_access(xe);
intel_hpd_poll_disable(xe);
} }
if (has_display(xe))
intel_hpd_poll_disable(xe);
intel_opregion_resume(display); intel_opregion_resume(display);
intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false); intel_fbdev_set_suspend(&xe->drm, FBINFO_STATE_RUNNING, false);
@ -421,6 +422,26 @@ void xe_display_pm_resume(struct xe_device *xe, bool runtime)
intel_power_domains_enable(xe); intel_power_domains_enable(xe);
} }
void xe_display_pm_resume(struct xe_device *xe)
{
__xe_display_pm_resume(xe, false);
}
void xe_display_pm_runtime_resume(struct xe_device *xe)
{
if (!xe->info.probe_display)
return;
if (xe->d3cold.allowed) {
__xe_display_pm_resume(xe, true);
return;
}
intel_hpd_init(xe);
intel_hpd_poll_disable(xe);
}
static void display_device_remove(struct drm_device *dev, void *arg) static void display_device_remove(struct drm_device *dev, void *arg)
{ {
struct xe_device *xe = arg; struct xe_device *xe = arg;

View File

@ -34,10 +34,10 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
void xe_display_irq_reset(struct xe_device *xe); void xe_display_irq_reset(struct xe_device *xe);
void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt); void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
void xe_display_pm_suspend(struct xe_device *xe, bool runtime); void xe_display_pm_suspend(struct xe_device *xe);
void xe_display_pm_suspend_late(struct xe_device *xe); void xe_display_pm_suspend_late(struct xe_device *xe);
void xe_display_pm_resume_early(struct xe_device *xe); void xe_display_pm_resume_early(struct xe_device *xe);
void xe_display_pm_resume(struct xe_device *xe, bool runtime); void xe_display_pm_resume(struct xe_device *xe);
void xe_display_pm_runtime_suspend(struct xe_device *xe); void xe_display_pm_runtime_suspend(struct xe_device *xe);
void xe_display_pm_runtime_resume(struct xe_device *xe); void xe_display_pm_runtime_resume(struct xe_device *xe);
@ -65,10 +65,10 @@ static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
static inline void xe_display_irq_reset(struct xe_device *xe) {} static inline void xe_display_irq_reset(struct xe_device *xe) {}
static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {} static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
static inline void xe_display_pm_suspend(struct xe_device *xe, bool runtime) {} static inline void xe_display_pm_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_suspend_late(struct xe_device *xe) {} static inline void xe_display_pm_suspend_late(struct xe_device *xe) {}
static inline void xe_display_pm_resume_early(struct xe_device *xe) {} static inline void xe_display_pm_resume_early(struct xe_device *xe) {}
static inline void xe_display_pm_resume(struct xe_device *xe, bool runtime) {} static inline void xe_display_pm_resume(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {} static inline void xe_display_pm_runtime_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {} static inline void xe_display_pm_runtime_resume(struct xe_device *xe) {}

View File

@ -397,6 +397,16 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
static void xe_ggtt_invalidate(struct xe_ggtt *ggtt) static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
{ {
struct xe_device *xe = tile_to_xe(ggtt->tile);
/*
* XXX: Barrier for GGTT pages. Unsure exactly why this required but
* without this LNL is having issues with the GuC reading scratch page
* vs. correct GGTT page. Not particularly a hot code path so blindly
* do a mmio read here which results in GuC reading correct GGTT page.
*/
xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
/* Each GT in a tile has its own TLB to cache GGTT lookups */ /* Each GT in a tile has its own TLB to cache GGTT lookups */
ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt); ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
ggtt_invalidate_gt_tlb(ggtt->tile->media_gt); ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);

View File

@ -916,12 +916,22 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job) static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
{ {
struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q)); struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]); u32 ctx_timestamp, ctx_job_timestamp;
u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
u32 timeout_ms = q->sched_props.job_timeout_ms; u32 timeout_ms = q->sched_props.job_timeout_ms;
u32 diff; u32 diff;
u64 running_time_ms; u64 running_time_ms;
if (!xe_sched_job_started(job)) {
xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
q->guc->id);
return xe_sched_invalidate_job(job, 2);
}
ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
/* /*
* Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
* possible overflows with a high timeout. * possible overflows with a high timeout.
@ -1049,10 +1059,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
exec_queue_killed_or_banned_or_wedged(q) || exec_queue_killed_or_banned_or_wedged(q) ||
exec_queue_destroyed(q); exec_queue_destroyed(q);
/* Job hasn't started, can't be timed out */
if (!skip_timeout_check && !xe_sched_job_started(job))
goto rearm;
/* /*
* XXX: Sampling timeout doesn't work in wedged mode as we have to * XXX: Sampling timeout doesn't work in wedged mode as we have to
* modify scheduling state to read timestamp. We could read the * modify scheduling state to read timestamp. We could read the

View File

@ -123,7 +123,7 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id) for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt); xe_gt_suspend_prepare(gt);
xe_display_pm_suspend(xe, false); xe_display_pm_suspend(xe);
/* FIXME: Super racey... */ /* FIXME: Super racey... */
err = xe_bo_evict_all(xe); err = xe_bo_evict_all(xe);
@ -133,7 +133,7 @@ int xe_pm_suspend(struct xe_device *xe)
for_each_gt(gt, xe, id) { for_each_gt(gt, xe, id) {
err = xe_gt_suspend(gt); err = xe_gt_suspend(gt);
if (err) { if (err) {
xe_display_pm_resume(xe, false); xe_display_pm_resume(xe);
goto err; goto err;
} }
} }
@ -187,7 +187,7 @@ int xe_pm_resume(struct xe_device *xe)
for_each_gt(gt, xe, id) for_each_gt(gt, xe, id)
xe_gt_resume(gt); xe_gt_resume(gt);
xe_display_pm_resume(xe, false); xe_display_pm_resume(xe);
err = xe_bo_restore_user(xe); err = xe_bo_restore_user(xe);
if (err) if (err)