Revert "FROMLIST: BACKPORT: mm: swap: swap cluster switch to double link list"

This reverts commit 6b195c7c13.

Signed-off-by: Chris Li <chrisl@kernel.org>
Bug: 313807618
Bug: 351082780
Change-Id: I58adbf1e009cd478353e056047dbc58a151af9b7
Signed-off-by: Kalesh Singh <kaleshsingh@google.com>
This commit is contained in:
Chris Li 2024-06-25 11:27:56 -07:00 committed by Treehugger Robot
parent 887a20b67d
commit 2b24dd9d53
2 changed files with 185 additions and 70 deletions

View File

@ -254,22 +254,23 @@ enum {
* space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
* free clusters are organized into a list. We fetch an entry from the list to * free clusters are organized into a list. We fetch an entry from the list to
* get a free cluster. * get a free cluster.
*
* The data field stores next cluster if the cluster is free or cluster usage
* counter otherwise. The flags field determines if a cluster is free. This is
* protected by swap_info_struct.lock.
*/ */
struct swap_cluster_info { struct swap_cluster_info {
spinlock_t lock; /* spinlock_t lock; /*
* Protect swap_cluster_info count and state * Protect swap_cluster_info fields
* field and swap_info_struct->swap_map * and swap_info_struct->swap_map
* elements correspond to the swap * elements correspond to the swap
* cluster * cluster
*/ */
unsigned int count:12; unsigned int data:24;
unsigned int state:3; unsigned int flags:8;
struct list_head list; /* Protected by swap_info_struct->lock */
}; };
#define CLUSTER_FLAG_FREE 1 /* This cluster is free */
#define CLUSTER_STATE_FREE 1 /* This cluster is free */ #define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */
#define CLUSTER_STATE_PER_CPU 2 /* This cluster on per_cpu_cluster */
/* /*
* The first page in the swap file is the swap header, which is always marked * The first page in the swap file is the swap header, which is always marked
@ -294,6 +295,11 @@ struct percpu_cluster {
unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */ unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */
}; };
struct swap_cluster_list {
struct swap_cluster_info head;
struct swap_cluster_info tail;
};
/* /*
* The in-memory structure used to track swap areas. * The in-memory structure used to track swap areas.
*/ */
@ -306,7 +312,7 @@ struct swap_info_struct {
unsigned int max; /* extent of the swap_map */ unsigned int max; /* extent of the swap_map */
unsigned char *swap_map; /* vmalloc'ed array of usage counts */ unsigned char *swap_map; /* vmalloc'ed array of usage counts */
struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */ struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
struct list_head free_clusters; /* free clusters list */ struct swap_cluster_list free_clusters; /* free clusters list */
unsigned int lowest_bit; /* index of first free in swap_map */ unsigned int lowest_bit; /* index of first free in swap_map */
unsigned int highest_bit; /* index of last free in swap_map */ unsigned int highest_bit; /* index of last free in swap_map */
unsigned int pages; /* total of usable pages of swap */ unsigned int pages; /* total of usable pages of swap */
@ -338,7 +344,7 @@ struct swap_info_struct {
* list. * list.
*/ */
struct work_struct discard_work; /* discard worker */ struct work_struct discard_work; /* discard worker */
struct list_head discard_clusters; /* discard clusters list */ struct swap_cluster_list discard_clusters; /* discard clusters list */
struct plist_node avail_lists[]; /* struct plist_node avail_lists[]; /*
* entries in swap_avail_heads, one * entries in swap_avail_heads, one
* entry per node. * entry per node.

View File

@ -289,9 +289,62 @@ static void discard_swap_cluster(struct swap_info_struct *si,
#endif #endif
#define LATENCY_LIMIT 256 #define LATENCY_LIMIT 256
static inline void cluster_set_flag(struct swap_cluster_info *info,
unsigned int flag)
{
info->flags = flag;
}
static inline unsigned int cluster_count(struct swap_cluster_info *info)
{
return info->data;
}
static inline void cluster_set_count(struct swap_cluster_info *info,
unsigned int c)
{
info->data = c;
}
static inline void cluster_set_count_flag(struct swap_cluster_info *info,
unsigned int c, unsigned int f)
{
info->flags = f;
info->data = c;
}
static inline unsigned int cluster_next(struct swap_cluster_info *info)
{
return info->data;
}
static inline void cluster_set_next(struct swap_cluster_info *info,
unsigned int n)
{
info->data = n;
}
static inline void cluster_set_next_flag(struct swap_cluster_info *info,
unsigned int n, unsigned int f)
{
info->flags = f;
info->data = n;
}
static inline bool cluster_is_free(struct swap_cluster_info *info) static inline bool cluster_is_free(struct swap_cluster_info *info)
{ {
return info->state == CLUSTER_STATE_FREE; return info->flags & CLUSTER_FLAG_FREE;
}
static inline bool cluster_is_null(struct swap_cluster_info *info)
{
return info->flags & CLUSTER_FLAG_NEXT_NULL;
}
static inline void cluster_set_null(struct swap_cluster_info *info)
{
info->flags = CLUSTER_FLAG_NEXT_NULL;
info->data = 0;
} }
static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si, static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
@ -340,11 +393,65 @@ static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
spin_unlock(&si->lock); spin_unlock(&si->lock);
} }
static inline bool cluster_list_empty(struct swap_cluster_list *list)
{
return cluster_is_null(&list->head);
}
static inline unsigned int cluster_list_first(struct swap_cluster_list *list)
{
return cluster_next(&list->head);
}
static void cluster_list_init(struct swap_cluster_list *list)
{
cluster_set_null(&list->head);
cluster_set_null(&list->tail);
}
static void cluster_list_add_tail(struct swap_cluster_list *list,
struct swap_cluster_info *ci,
unsigned int idx)
{
if (cluster_list_empty(list)) {
cluster_set_next_flag(&list->head, idx, 0);
cluster_set_next_flag(&list->tail, idx, 0);
} else {
struct swap_cluster_info *ci_tail;
unsigned int tail = cluster_next(&list->tail);
/*
* Nested cluster lock, but both cluster locks are
* only acquired when we held swap_info_struct->lock
*/
ci_tail = ci + tail;
spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING);
cluster_set_next(ci_tail, idx);
spin_unlock(&ci_tail->lock);
cluster_set_next_flag(&list->tail, idx, 0);
}
}
static unsigned int cluster_list_del_first(struct swap_cluster_list *list,
struct swap_cluster_info *ci)
{
unsigned int idx;
idx = cluster_next(&list->head);
if (cluster_next(&list->tail) == idx) {
cluster_set_null(&list->head);
cluster_set_null(&list->tail);
} else
cluster_set_next_flag(&list->head,
cluster_next(&ci[idx]), 0);
return idx;
}
/* Add a cluster to discard list and schedule it to do discard */ /* Add a cluster to discard list and schedule it to do discard */
static void swap_cluster_schedule_discard(struct swap_info_struct *si, static void swap_cluster_schedule_discard(struct swap_info_struct *si,
struct swap_cluster_info *ci) unsigned int idx)
{ {
unsigned int idx = ci - si->cluster_info;
/* /*
* If scan_swap_map_slots() can't find a free cluster, it will check * If scan_swap_map_slots() can't find a free cluster, it will check
* si->swap_map directly. To make sure the discarding cluster isn't * si->swap_map directly. To make sure the discarding cluster isn't
@ -354,14 +461,17 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
memset(si->swap_map + idx * SWAPFILE_CLUSTER, memset(si->swap_map + idx * SWAPFILE_CLUSTER,
SWAP_MAP_BAD, SWAPFILE_CLUSTER); SWAP_MAP_BAD, SWAPFILE_CLUSTER);
list_add_tail(&ci->list, &si->discard_clusters); cluster_list_add_tail(&si->discard_clusters, si->cluster_info, idx);
schedule_work(&si->discard_work); schedule_work(&si->discard_work);
} }
static void __free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci) static void __free_cluster(struct swap_info_struct *si, unsigned long idx)
{ {
ci->state = CLUSTER_STATE_FREE; struct swap_cluster_info *ci = si->cluster_info;
list_add_tail(&ci->list, &si->free_clusters);
cluster_set_flag(ci + idx, CLUSTER_FLAG_FREE);
cluster_list_add_tail(&si->free_clusters, ci, idx);
} }
/* /*
@ -370,22 +480,21 @@ static void __free_cluster(struct swap_info_struct *si, struct swap_cluster_info
*/ */
static void swap_do_scheduled_discard(struct swap_info_struct *si) static void swap_do_scheduled_discard(struct swap_info_struct *si)
{ {
struct swap_cluster_info *ci; struct swap_cluster_info *info, *ci;
unsigned int idx; unsigned int idx;
while (!list_empty(&si->discard_clusters)) { info = si->cluster_info;
ci = list_first_entry(&si->discard_clusters, struct swap_cluster_info, list);
list_del(&ci->list); while (!cluster_list_empty(&si->discard_clusters)) {
idx = ci - si->cluster_info; idx = cluster_list_del_first(&si->discard_clusters, info);
spin_unlock(&si->lock); spin_unlock(&si->lock);
discard_swap_cluster(si, idx * SWAPFILE_CLUSTER, discard_swap_cluster(si, idx * SWAPFILE_CLUSTER,
SWAPFILE_CLUSTER); SWAPFILE_CLUSTER);
spin_lock(&si->lock); spin_lock(&si->lock);
ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
spin_lock(&ci->lock); __free_cluster(si, idx);
__free_cluster(si, ci);
memset(si->swap_map + idx * SWAPFILE_CLUSTER, memset(si->swap_map + idx * SWAPFILE_CLUSTER,
0, SWAPFILE_CLUSTER); 0, SWAPFILE_CLUSTER);
unlock_cluster(ci); unlock_cluster(ci);
@ -411,19 +520,20 @@ static void swap_users_ref_free(struct percpu_ref *ref)
complete(&si->comp); complete(&si->comp);
} }
static struct swap_cluster_info *alloc_cluster(struct swap_info_struct *si, unsigned long idx) static void alloc_cluster(struct swap_info_struct *si, unsigned long idx)
{ {
struct swap_cluster_info *ci = list_first_entry(&si->free_clusters, struct swap_cluster_info, list); struct swap_cluster_info *ci = si->cluster_info;
VM_BUG_ON(ci - si->cluster_info != idx); VM_BUG_ON(cluster_list_first(&si->free_clusters) != idx);
list_del(&ci->list); cluster_list_del_first(&si->free_clusters, ci);
ci->count = 0; cluster_set_count_flag(ci + idx, 0, 0);
return ci;
} }
static void free_cluster(struct swap_info_struct *si, struct swap_cluster_info *ci) static void free_cluster(struct swap_info_struct *si, unsigned long idx)
{ {
VM_BUG_ON(ci->count != 0); struct swap_cluster_info *ci = si->cluster_info + idx;
VM_BUG_ON(cluster_count(ci) != 0);
/* /*
* If the swap is discardable, prepare discard the cluster * If the swap is discardable, prepare discard the cluster
* instead of free it immediately. The cluster will be freed * instead of free it immediately. The cluster will be freed
@ -431,11 +541,11 @@ static void free_cluster(struct swap_info_struct *si, struct swap_cluster_info *
*/ */
if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) == if ((si->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
(SWP_WRITEOK | SWP_PAGE_DISCARD)) { (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
swap_cluster_schedule_discard(si, ci); swap_cluster_schedule_discard(si, idx);
return; return;
} }
__free_cluster(si, ci); __free_cluster(si, idx);
} }
/* /*
@ -448,15 +558,15 @@ static void add_cluster_info_page(struct swap_info_struct *p,
unsigned long count) unsigned long count)
{ {
unsigned long idx = page_nr / SWAPFILE_CLUSTER; unsigned long idx = page_nr / SWAPFILE_CLUSTER;
struct swap_cluster_info *ci = cluster_info + idx;
if (!cluster_info) if (!cluster_info)
return; return;
if (cluster_is_free(ci)) if (cluster_is_free(&cluster_info[idx]))
alloc_cluster(p, idx); alloc_cluster(p, idx);
VM_BUG_ON(ci->count + count > SWAPFILE_CLUSTER); VM_BUG_ON(cluster_count(&cluster_info[idx]) + count > SWAPFILE_CLUSTER);
ci->count += count; cluster_set_count(&cluster_info[idx],
cluster_count(&cluster_info[idx]) + count);
} }
/* /*
@ -470,20 +580,24 @@ static void inc_cluster_info_page(struct swap_info_struct *p,
} }
/* /*
* The cluster ci decreases one usage. If the usage counter becomes 0, * The cluster corresponding to page_nr decreases one usage. If the usage
* which means no page in the cluster is in using, we can optionally discard * counter becomes 0, which means no page in the cluster is in using, we can
* the cluster and add it to free cluster list. * optionally discard the cluster and add it to free cluster list.
*/ */
static void dec_cluster_info_page(struct swap_info_struct *p, struct swap_cluster_info *ci) static void dec_cluster_info_page(struct swap_info_struct *p,
struct swap_cluster_info *cluster_info, unsigned long page_nr)
{ {
if (!p->cluster_info) unsigned long idx = page_nr / SWAPFILE_CLUSTER;
if (!cluster_info)
return; return;
VM_BUG_ON(ci->count == 0); VM_BUG_ON(cluster_count(&cluster_info[idx]) == 0);
ci->count--; cluster_set_count(&cluster_info[idx],
cluster_count(&cluster_info[idx]) - 1);
if (!ci->count) if (cluster_count(&cluster_info[idx]) == 0)
free_cluster(p, ci); free_cluster(p, idx);
} }
/* /*
@ -496,10 +610,10 @@ scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
{ {
struct percpu_cluster *percpu_cluster; struct percpu_cluster *percpu_cluster;
bool conflict; bool conflict;
struct swap_cluster_info *first = list_first_entry(&si->free_clusters, struct swap_cluster_info, list);
offset /= SWAPFILE_CLUSTER; offset /= SWAPFILE_CLUSTER;
conflict = !list_empty(&si->free_clusters) && conflict = !cluster_list_empty(&si->free_clusters) &&
offset != first - si->cluster_info && offset != cluster_list_first(&si->free_clusters) &&
cluster_is_free(&si->cluster_info[offset]); cluster_is_free(&si->cluster_info[offset]);
if (!conflict) if (!conflict)
@ -540,14 +654,10 @@ new_cluster:
cluster = this_cpu_ptr(si->percpu_cluster); cluster = this_cpu_ptr(si->percpu_cluster);
tmp = cluster->next[order]; tmp = cluster->next[order];
if (tmp == SWAP_NEXT_INVALID) { if (tmp == SWAP_NEXT_INVALID) {
if (!list_empty(&si->free_clusters)) { if (!cluster_list_empty(&si->free_clusters)) {
ci = list_first_entry(&si->free_clusters, struct swap_cluster_info, list); tmp = cluster_next(&si->free_clusters.head) *
list_del(&ci->list); SWAPFILE_CLUSTER;
spin_lock(&ci->lock); } else if (!cluster_list_empty(&si->discard_clusters)) {
ci->state = CLUSTER_STATE_PER_CPU;
spin_unlock(&ci->lock);
tmp = (ci - si->cluster_info) * SWAPFILE_CLUSTER;
} else if (!list_empty(&si->discard_clusters)) {
/* /*
* we don't have free cluster but have some clusters in * we don't have free cluster but have some clusters in
* discarding, do discard now and reclaim them, then * discarding, do discard now and reclaim them, then
@ -946,8 +1056,8 @@ static void swap_free_cluster(struct swap_info_struct *si, unsigned long idx)
ci = lock_cluster(si, offset); ci = lock_cluster(si, offset);
memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER); memset(si->swap_map + offset, 0, SWAPFILE_CLUSTER);
ci->count = 0; cluster_set_count_flag(ci, 0, 0);
free_cluster(si, ci); free_cluster(si, idx);
unlock_cluster(ci); unlock_cluster(ci);
swap_range_free(si, offset, SWAPFILE_CLUSTER); swap_range_free(si, offset, SWAPFILE_CLUSTER);
} }
@ -1221,7 +1331,7 @@ static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
count = p->swap_map[offset]; count = p->swap_map[offset];
VM_BUG_ON(count != SWAP_HAS_CACHE); VM_BUG_ON(count != SWAP_HAS_CACHE);
p->swap_map[offset] = 0; p->swap_map[offset] = 0;
dec_cluster_info_page(p, ci); dec_cluster_info_page(p, p->cluster_info, offset);
unlock_cluster(ci); unlock_cluster(ci);
mem_cgroup_uncharge_swap(entry, 1); mem_cgroup_uncharge_swap(entry, 1);
@ -2898,8 +3008,8 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
nr_good_pages = maxpages - 1; /* omit header page */ nr_good_pages = maxpages - 1; /* omit header page */
INIT_LIST_HEAD(&p->free_clusters); cluster_list_init(&p->free_clusters);
INIT_LIST_HEAD(&p->discard_clusters); cluster_list_init(&p->discard_clusters);
for (i = 0; i < swap_header->info.nr_badpages; i++) { for (i = 0; i < swap_header->info.nr_badpages; i++) {
unsigned int page_nr = swap_header->info.badpages[i]; unsigned int page_nr = swap_header->info.badpages[i];
@ -2950,15 +3060,14 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
for (k = 0; k < SWAP_CLUSTER_COLS; k++) { for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
j = (k + col) % SWAP_CLUSTER_COLS; j = (k + col) % SWAP_CLUSTER_COLS;
for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) { for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
struct swap_cluster_info *ci;
idx = i * SWAP_CLUSTER_COLS + j; idx = i * SWAP_CLUSTER_COLS + j;
ci = cluster_info + idx;
if (idx >= nr_clusters) if (idx >= nr_clusters)
continue; continue;
if (ci->count) if (cluster_count(&cluster_info[idx]))
continue; continue;
ci->state = CLUSTER_STATE_FREE; cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
list_add_tail(&ci->list, &p->free_clusters); cluster_list_add_tail(&p->free_clusters, cluster_info,
idx);
} }
} }
return nr_extents; return nr_extents;