mirror of
https://github.com/nxp-imx/linux-imx.git
synced 2025-07-12 20:35:23 +02:00
mm/mglru: fix ineffective protection calculation
commit30d77b7eef
upstream. mem_cgroup_calculate_protection() is not stateless and should only be used as part of a top-down tree traversal. shrink_one() traverses the per-node memcg LRU instead of the root_mem_cgroup tree, and therefore it should not call mem_cgroup_calculate_protection(). The existing misuse in shrink_one() can cause ineffective protection of sub-trees that are grandchildren of root_mem_cgroup. Fix it by reusing lru_gen_age_node(), which already traverses the root_mem_cgroup tree, to calculate the protection. Previously lru_gen_age_node() opportunistically skips the first pass, i.e., when scan_control->priority is DEF_PRIORITY. On the second pass, lruvec_is_sizable() uses appropriate scan_control->priority, set by set_initial_priority() from lru_gen_shrink_node(), to decide whether a memcg is too small to reclaim from. Now lru_gen_age_node() unconditionally traverses the root_mem_cgroup tree. So it should call set_initial_priority() upfront, to make sure lruvec_is_sizable() uses appropriate scan_control->priority on the first pass. Otherwise, lruvec_is_reclaimable() can return false negatives and result in premature OOM kills when min_ttl_ms is used. Link: https://lkml.kernel.org/r/20240712232956.1427127-1-yuzhao@google.com Fixes:e4dde56cd2
("mm: multi-gen LRU: per-node lru_gen_folio lists") Signed-off-by: Yu Zhao <yuzhao@google.com> Reported-by: T.J. Mercier <tjmercier@google.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
This commit is contained in:
parent
be56dfc9be
commit
4211d065ef
83
mm/vmscan.c
83
mm/vmscan.c
|
@ -4546,6 +4546,32 @@ done:
|
||||||
* working set protection
|
* working set protection
|
||||||
******************************************************************************/
|
******************************************************************************/
|
||||||
|
|
||||||
|
static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc)
|
||||||
|
{
|
||||||
|
int priority;
|
||||||
|
unsigned long reclaimable;
|
||||||
|
|
||||||
|
if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH)
|
||||||
|
return;
|
||||||
|
/*
|
||||||
|
* Determine the initial priority based on
|
||||||
|
* (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim,
|
||||||
|
* where reclaimed_to_scanned_ratio = inactive / total.
|
||||||
|
*/
|
||||||
|
reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE);
|
||||||
|
if (can_reclaim_anon_pages(NULL, pgdat->node_id, sc))
|
||||||
|
reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON);
|
||||||
|
|
||||||
|
/* round down reclaimable and round up sc->nr_to_reclaim */
|
||||||
|
priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The estimation is based on LRU pages only, so cap it to prevent
|
||||||
|
* overshoots of shrinker objects by large margins.
|
||||||
|
*/
|
||||||
|
sc->priority = clamp(priority, DEF_PRIORITY / 2, DEF_PRIORITY);
|
||||||
|
}
|
||||||
|
|
||||||
static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
|
static bool lruvec_is_sizable(struct lruvec *lruvec, struct scan_control *sc)
|
||||||
{
|
{
|
||||||
int gen, type, zone;
|
int gen, type, zone;
|
||||||
|
@ -4579,19 +4605,17 @@ static bool lruvec_is_reclaimable(struct lruvec *lruvec, struct scan_control *sc
|
||||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||||
DEFINE_MIN_SEQ(lruvec);
|
DEFINE_MIN_SEQ(lruvec);
|
||||||
|
|
||||||
/* see the comment on lru_gen_folio */
|
if (mem_cgroup_below_min(NULL, memcg))
|
||||||
gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
|
|
||||||
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
|
|
||||||
|
|
||||||
if (time_is_after_jiffies(birth + min_ttl))
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
if (!lruvec_is_sizable(lruvec, sc))
|
if (!lruvec_is_sizable(lruvec, sc))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
mem_cgroup_calculate_protection(NULL, memcg);
|
/* see the comment on lru_gen_folio */
|
||||||
|
gen = lru_gen_from_seq(min_seq[LRU_GEN_FILE]);
|
||||||
|
birth = READ_ONCE(lruvec->lrugen.timestamps[gen]);
|
||||||
|
|
||||||
return !mem_cgroup_below_min(NULL, memcg);
|
return time_is_before_jiffies(birth + min_ttl);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* to protect the working set of the last N jiffies */
|
/* to protect the working set of the last N jiffies */
|
||||||
|
@ -4601,23 +4625,20 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||||
{
|
{
|
||||||
struct mem_cgroup *memcg;
|
struct mem_cgroup *memcg;
|
||||||
unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
|
unsigned long min_ttl = READ_ONCE(lru_gen_min_ttl);
|
||||||
|
bool reclaimable = !min_ttl;
|
||||||
|
|
||||||
VM_WARN_ON_ONCE(!current_is_kswapd());
|
VM_WARN_ON_ONCE(!current_is_kswapd());
|
||||||
|
|
||||||
/* check the order to exclude compaction-induced reclaim */
|
set_initial_priority(pgdat, sc);
|
||||||
if (!min_ttl || sc->order || sc->priority == DEF_PRIORITY)
|
|
||||||
return;
|
|
||||||
|
|
||||||
memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
memcg = mem_cgroup_iter(NULL, NULL, NULL);
|
||||||
do {
|
do {
|
||||||
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
|
||||||
|
|
||||||
if (lruvec_is_reclaimable(lruvec, sc, min_ttl)) {
|
mem_cgroup_calculate_protection(NULL, memcg);
|
||||||
mem_cgroup_iter_break(NULL, memcg);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
cond_resched();
|
if (!reclaimable)
|
||||||
|
reclaimable = lruvec_is_reclaimable(lruvec, sc, min_ttl);
|
||||||
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
} while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -4625,7 +4646,7 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||||
* younger than min_ttl. However, another possibility is all memcgs are
|
* younger than min_ttl. However, another possibility is all memcgs are
|
||||||
* either too small or below min.
|
* either too small or below min.
|
||||||
*/
|
*/
|
||||||
if (mutex_trylock(&oom_lock)) {
|
if (!reclaimable && mutex_trylock(&oom_lock)) {
|
||||||
struct oom_control oc = {
|
struct oom_control oc = {
|
||||||
.gfp_mask = sc->gfp_mask,
|
.gfp_mask = sc->gfp_mask,
|
||||||
};
|
};
|
||||||
|
@ -5424,8 +5445,7 @@ static int shrink_one(struct lruvec *lruvec, struct scan_control *sc)
|
||||||
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
struct mem_cgroup *memcg = lruvec_memcg(lruvec);
|
||||||
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
struct pglist_data *pgdat = lruvec_pgdat(lruvec);
|
||||||
|
|
||||||
mem_cgroup_calculate_protection(NULL, memcg);
|
/* lru_gen_age_node() called mem_cgroup_calculate_protection() */
|
||||||
|
|
||||||
if (mem_cgroup_below_min(NULL, memcg))
|
if (mem_cgroup_below_min(NULL, memcg))
|
||||||
return MEMCG_LRU_YOUNG;
|
return MEMCG_LRU_YOUNG;
|
||||||
|
|
||||||
|
@ -5565,33 +5585,6 @@ static void lru_gen_shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static void set_initial_priority(struct pglist_data *pgdat, struct scan_control *sc)
|
|
||||||
{
|
|
||||||
int priority;
|
|
||||||
unsigned long reclaimable;
|
|
||||||
struct lruvec *lruvec = mem_cgroup_lruvec(NULL, pgdat);
|
|
||||||
|
|
||||||
if (sc->priority != DEF_PRIORITY || sc->nr_to_reclaim < MIN_LRU_BATCH)
|
|
||||||
return;
|
|
||||||
/*
|
|
||||||
* Determine the initial priority based on
|
|
||||||
* (total >> priority) * reclaimed_to_scanned_ratio = nr_to_reclaim,
|
|
||||||
* where reclaimed_to_scanned_ratio = inactive / total.
|
|
||||||
*/
|
|
||||||
reclaimable = node_page_state(pgdat, NR_INACTIVE_FILE);
|
|
||||||
if (get_swappiness(lruvec, sc))
|
|
||||||
reclaimable += node_page_state(pgdat, NR_INACTIVE_ANON);
|
|
||||||
|
|
||||||
/* round down reclaimable and round up sc->nr_to_reclaim */
|
|
||||||
priority = fls_long(reclaimable) - 1 - fls_long(sc->nr_to_reclaim - 1);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* The estimation is based on LRU pages only, so cap it to prevent
|
|
||||||
* overshoots of shrinker objects by large margins.
|
|
||||||
*/
|
|
||||||
sc->priority = clamp(priority, DEF_PRIORITY / 2, DEF_PRIORITY);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
|
static void lru_gen_shrink_node(struct pglist_data *pgdat, struct scan_control *sc)
|
||||||
{
|
{
|
||||||
struct blk_plug plug;
|
struct blk_plug plug;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user