mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 23:13:01 +02:00
amd/amdkfd: correct mem limit calculation for small APUs
[ Upstream commit 53503556273a5ead8b75534085e2dcb46e96f883 ] Current mem limit check leaks some GTT memory (reserved_for_pt reserved_for_ras + adev->vram_pin_size) for small APUs. Since carveout VRAM is tunable on APUs, there are three case regarding the carveout VRAM size relative to GTT: 1. 0 < carveout < gtt apu_prefer_gtt = true, is_app_apu = false 2. carveout > gtt / 2 apu_prefer_gtt = false, is_app_apu = false 3. 0 = carveout apu_prefer_gtt = true, is_app_apu = true It doesn't make sense to check below limitation in case 1 (default case, small carveout) because the values in the below expression are mixed with carveout and gtt. adev->kfd.vram_used[xcp_id] + vram_needed > vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size) gtt: kfd.vram_used, vram_needed, vram_size carveout: reserved_for_pt, reserved_for_ras, adev->vram_pin_size In case 1, vram allocation will go to gtt domain, skip vram check since ttm_mem_limit check already cover this allocation. Signed-off-by: Yifan Zhang <yifan1.zhang@amd.com> Reviewed-by: Mario Limonciello <mario.limonciello@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com> (cherry picked from commit fa7c99f04f6dd299388e9282812b14e95558ac8e) Signed-off-by: Sasha Levin <sashal@kernel.org>
This commit is contained in:
parent
a01d1325e0
commit
0bcc5ea4bb
|
@ -213,19 +213,35 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
|
|||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
|
||||
if (kfd_mem_limit.system_mem_used + system_mem_needed >
|
||||
kfd_mem_limit.max_system_mem_limit)
|
||||
kfd_mem_limit.max_system_mem_limit) {
|
||||
pr_debug("Set no_system_mem_limit=1 if using shared memory\n");
|
||||
if (!no_system_mem_limit) {
|
||||
ret = -ENOMEM;
|
||||
goto release;
|
||||
}
|
||||
}
|
||||
|
||||
if ((kfd_mem_limit.system_mem_used + system_mem_needed >
|
||||
kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) ||
|
||||
(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
|
||||
kfd_mem_limit.max_ttm_mem_limit) ||
|
||||
(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed >
|
||||
vram_size - reserved_for_pt - reserved_for_ras - atomic64_read(&adev->vram_pin_size))) {
|
||||
if (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
|
||||
kfd_mem_limit.max_ttm_mem_limit) {
|
||||
ret = -ENOMEM;
|
||||
goto release;
|
||||
}
|
||||
|
||||
/*if is_app_apu is false and apu_prefer_gtt is true, it is an APU with
|
||||
* carve out < gtt. In that case, VRAM allocation will go to gtt domain, skip
|
||||
* VRAM check since ttm_mem_limit check already cover this allocation
|
||||
*/
|
||||
|
||||
if (adev && xcp_id >= 0 && (!adev->apu_prefer_gtt || adev->gmc.is_app_apu)) {
|
||||
uint64_t vram_available =
|
||||
vram_size - reserved_for_pt - reserved_for_ras -
|
||||
atomic64_read(&adev->vram_pin_size);
|
||||
if (adev->kfd.vram_used[xcp_id] + vram_needed > vram_available) {
|
||||
ret = -ENOMEM;
|
||||
goto release;
|
||||
}
|
||||
}
|
||||
|
||||
/* Update memory accounting by decreasing available system
|
||||
* memory, TTM memory and GPU memory as computed above
|
||||
*/
|
||||
|
@ -1626,11 +1642,15 @@ size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
|
|||
uint64_t vram_available, system_mem_available, ttm_mem_available;
|
||||
|
||||
spin_lock(&kfd_mem_limit.mem_limit_lock);
|
||||
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
|
||||
- adev->kfd.vram_used_aligned[xcp_id]
|
||||
- atomic64_read(&adev->vram_pin_size)
|
||||
- reserved_for_pt
|
||||
- reserved_for_ras;
|
||||
if (adev->apu_prefer_gtt && !adev->gmc.is_app_apu)
|
||||
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
|
||||
- adev->kfd.vram_used_aligned[xcp_id];
|
||||
else
|
||||
vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id)
|
||||
- adev->kfd.vram_used_aligned[xcp_id]
|
||||
- atomic64_read(&adev->vram_pin_size)
|
||||
- reserved_for_pt
|
||||
- reserved_for_ras;
|
||||
|
||||
if (adev->apu_prefer_gtt) {
|
||||
system_mem_available = no_system_mem_limit ?
|
||||
|
|
Loading…
Reference in New Issue
Block a user