mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 23:13:01 +02:00
mm: swap: correctly use maxpages in swapon syscall to avoid potential deadloop
We use maxpages from read_swap_header() to initialize swap_info_struct,
however the maxpages might be reduced in setup_swap_extents() and the
si->max is assigned with the reduced maxpages from the
setup_swap_extents().
Obviously, this could lead to memory waste as we allocated memory based on
larger maxpages, besides, this could lead to a potential deadloop as
following:
1) When calling setup_clusters() with larger maxpages, unavailable
pages within range [si->max, larger maxpages) are not accounted with
inc_cluster_info_page(). As a result, these pages are assumed
available but can not be allocated. The cluster contains these pages
can be moved to frag_clusters list after it's all available pages were
allocated.
2) When the cluster mentioned in 1) is the only cluster in
frag_clusters list, cluster_alloc_swap_entry() assume order 0
allocation will never failed and will enter a deadloop by keep trying
to allocate page from the only cluster in frag_clusters which contains
no actually available page.
Call setup_swap_extents() to get the final maxpages before
swap_info_struct initialization to fix the issue.
After this change, span will include badblocks and will become large
value which I think is correct value:
In summary, there are two kinds of swapfile_activate operations.
1. Filesystem style: Treat all blocks logical continuity and find
usable physical extents in logical range. In this way, si->pages will
be actual usable physical blocks and span will be "1 + highest_block -
lowest_block".
2. Block device style: Treat all blocks physically continue and only
one single extent is added. In this way, si->pages will be si->max and
span will be "si->pages - 1". Actually, si->pages and si->max is only
used in block device style and span value is set with si->pages. As a
result, span value in block device style will become a larger value as
you mentioned.
I think larger value is correct based on:
1. Span value in filesystem style is "1 + highest_block -
lowest_block" which is the range cover all possible phisical blocks
including the badblocks.
2. For block device style, si->pages is the actual usable block number
and is already in pr_info. The original span value before this patch
is also refer to usable block number which is redundant in pr_info.
[shikemeng@huaweicloud.com: ensure si->pages == si->max - 1 after setup_swap_extents()]
Link: https://lkml.kernel.org/r/20250522122554.12209-3-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20250718065139.61989-1-shikemeng@huaweicloud.com
Link: https://lkml.kernel.org/r/20250522122554.12209-3-shikemeng@huaweicloud.com
Fixes: 661383c611
("mm: swap: relaim the cached parts that got scanned")
Signed-off-by: Kemeng Shi <shikemeng@huaweicloud.com>
Reviewed-by: Baoquan He <bhe@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Kairui Song <kasong@tencent.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
parent
4f78252da8
commit
255116c5b0
|
@ -3141,43 +3141,30 @@ static unsigned long read_swap_header(struct swap_info_struct *si,
|
|||
return maxpages;
|
||||
}
|
||||
|
||||
static int setup_swap_map_and_extents(struct swap_info_struct *si,
|
||||
union swap_header *swap_header,
|
||||
unsigned char *swap_map,
|
||||
unsigned long maxpages,
|
||||
sector_t *span)
|
||||
static int setup_swap_map(struct swap_info_struct *si,
|
||||
union swap_header *swap_header,
|
||||
unsigned char *swap_map,
|
||||
unsigned long maxpages)
|
||||
{
|
||||
unsigned int nr_good_pages;
|
||||
unsigned long i;
|
||||
int nr_extents;
|
||||
|
||||
nr_good_pages = maxpages - 1; /* omit header page */
|
||||
|
||||
swap_map[0] = SWAP_MAP_BAD; /* omit header page */
|
||||
for (i = 0; i < swap_header->info.nr_badpages; i++) {
|
||||
unsigned int page_nr = swap_header->info.badpages[i];
|
||||
if (page_nr == 0 || page_nr > swap_header->info.last_page)
|
||||
return -EINVAL;
|
||||
if (page_nr < maxpages) {
|
||||
swap_map[page_nr] = SWAP_MAP_BAD;
|
||||
nr_good_pages--;
|
||||
si->pages--;
|
||||
}
|
||||
}
|
||||
|
||||
if (nr_good_pages) {
|
||||
swap_map[0] = SWAP_MAP_BAD;
|
||||
si->max = maxpages;
|
||||
si->pages = nr_good_pages;
|
||||
nr_extents = setup_swap_extents(si, span);
|
||||
if (nr_extents < 0)
|
||||
return nr_extents;
|
||||
nr_good_pages = si->pages;
|
||||
}
|
||||
if (!nr_good_pages) {
|
||||
if (!si->pages) {
|
||||
pr_warn("Empty swap-file\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return nr_extents;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define SWAP_CLUSTER_INFO_COLS \
|
||||
|
@ -3217,7 +3204,7 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
|
|||
* Mark unusable pages as unavailable. The clusters aren't
|
||||
* marked free yet, so no list operations are involved yet.
|
||||
*
|
||||
* See setup_swap_map_and_extents(): header page, bad pages,
|
||||
* See setup_swap_map(): header page, bad pages,
|
||||
* and the EOF part of the last cluster.
|
||||
*/
|
||||
inc_cluster_info_page(si, cluster_info, 0);
|
||||
|
@ -3363,6 +3350,21 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
|||
goto bad_swap_unlock_inode;
|
||||
}
|
||||
|
||||
si->max = maxpages;
|
||||
si->pages = maxpages - 1;
|
||||
nr_extents = setup_swap_extents(si, &span);
|
||||
if (nr_extents < 0) {
|
||||
error = nr_extents;
|
||||
goto bad_swap_unlock_inode;
|
||||
}
|
||||
if (si->pages != si->max - 1) {
|
||||
pr_err("swap:%u != (max:%u - 1)\n", si->pages, si->max);
|
||||
error = -EINVAL;
|
||||
goto bad_swap_unlock_inode;
|
||||
}
|
||||
|
||||
maxpages = si->max;
|
||||
|
||||
/* OK, set up the swap map and apply the bad block list */
|
||||
swap_map = vzalloc(maxpages);
|
||||
if (!swap_map) {
|
||||
|
@ -3374,12 +3376,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
|||
if (error)
|
||||
goto bad_swap_unlock_inode;
|
||||
|
||||
nr_extents = setup_swap_map_and_extents(si, swap_header, swap_map,
|
||||
maxpages, &span);
|
||||
if (unlikely(nr_extents < 0)) {
|
||||
error = nr_extents;
|
||||
error = setup_swap_map(si, swap_header, swap_map, maxpages);
|
||||
if (error)
|
||||
goto bad_swap_unlock_inode;
|
||||
}
|
||||
|
||||
/*
|
||||
* Use kvmalloc_array instead of bitmap_zalloc as the allocation order might
|
||||
|
|
Loading…
Reference in New Issue
Block a user