Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit f95bdb7

Browse files
Qi Zhengakpm00
authored andcommitted
mm: vmscan: make global slab shrink lockless
The shrinker_rwsem is a global read-write lock in shrinkers subsystem, which protects most operations such as slab shrink, registration and unregistration of shrinkers, etc. This can easily cause problems in the following cases. 1) When the memory pressure is high and there are many filesystems mounted or unmounted at the same time, slab shrink will be affected (down_read_trylock() failed). Such as the real workload mentioned by Kirill Tkhai: ``` One of the real workloads from my experience is start of an overcommitted node containing many starting containers after node crash (or many resuming containers after reboot for kernel update). In these cases memory pressure is huge, and the node goes round in long reclaim. ``` 2) If a shrinker is blocked (such as the case mentioned in [1]) and a writer comes in (such as mount a fs), then this writer will be blocked and cause all subsequent shrinker-related operations to be blocked. Even if there is no competitor when shrinking slab, there may still be a problem. If we have a long shrinker list and we do not reclaim enough memory with each shrinker, then the down_read_trylock() may be called with high frequency. Because of the poor multicore scalability of atomic operations, this can lead to a significant drop in IPC (instructions per cycle). So many times in history ([2],[3],[4],[5]), some people wanted to replace shrinker_rwsem trylock with SRCU in the slab shrink, but all these patches were abandoned because SRCU was not unconditionally enabled. But now, since commit 1cd0bd0 ("rcu: Remove CONFIG_SRCU"), the SRCU is unconditionally enabled. So it's time to use SRCU to protect readers who previously held shrinker_rwsem. This commit uses SRCU to make global slab shrink lockless, the memcg slab shrink is handled in the subsequent patch. [1]. https://lore.kernel.org/lkml/[email protected]/ [2]. https://lore.kernel.org/all/[email protected]/ [3]. https://lore.kernel.org/lkml/1510609063-3327-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp/ [4]. https://lore.kernel.org/lkml/153365347929.19074.12509495712735843805.stgit@localhost.localdomain/ [5]. https://lore.kernel.org/lkml/[email protected]/ Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Qi Zheng <[email protected]> Acked-by: Vlastimil Babka <[email protected]> Acked-by: Kirill Tkhai <[email protected]> Acked-by: Roman Gushchin <[email protected]> Cc: Christian König <[email protected]> Cc: David Hildenbrand <[email protected]> Cc: Davidlohr Bueso <[email protected]> Cc: Johannes Weiner <[email protected]> Cc: Michal Hocko <[email protected]> Cc: Muchun Song <[email protected]> Cc: Paul E. McKenney <[email protected]> Cc: Shakeel Butt <[email protected]> Cc: Yang Shi <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 42c9db3 commit f95bdb7

File tree

1 file changed

+12
-16
lines changed

1 file changed

+12
-16
lines changed

mm/vmscan.c

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@
5757
#include <linux/khugepaged.h>
5858
#include <linux/rculist_nulls.h>
5959
#include <linux/random.h>
60+
#include <linux/srcu.h>
6061

6162
#include <asm/tlbflush.h>
6263
#include <asm/div64.h>
@@ -202,6 +203,7 @@ static void set_task_reclaim_state(struct task_struct *task,
202203

203204
LIST_HEAD(shrinker_list);
204205
DECLARE_RWSEM(shrinker_rwsem);
206+
DEFINE_SRCU(shrinker_srcu);
205207

206208
#ifdef CONFIG_MEMCG
207209
static int shrinker_nr_max;
@@ -700,7 +702,7 @@ void free_prealloced_shrinker(struct shrinker *shrinker)
700702
void register_shrinker_prepared(struct shrinker *shrinker)
701703
{
702704
down_write(&shrinker_rwsem);
703-
list_add_tail(&shrinker->list, &shrinker_list);
705+
list_add_tail_rcu(&shrinker->list, &shrinker_list);
704706
shrinker->flags |= SHRINKER_REGISTERED;
705707
shrinker_debugfs_add(shrinker);
706708
up_write(&shrinker_rwsem);
@@ -754,13 +756,15 @@ void unregister_shrinker(struct shrinker *shrinker)
754756
return;
755757

756758
down_write(&shrinker_rwsem);
757-
list_del(&shrinker->list);
759+
list_del_rcu(&shrinker->list);
758760
shrinker->flags &= ~SHRINKER_REGISTERED;
759761
if (shrinker->flags & SHRINKER_MEMCG_AWARE)
760762
unregister_memcg_shrinker(shrinker);
761763
debugfs_entry = shrinker_debugfs_remove(shrinker);
762764
up_write(&shrinker_rwsem);
763765

766+
synchronize_srcu(&shrinker_srcu);
767+
764768
debugfs_remove_recursive(debugfs_entry);
765769

766770
kfree(shrinker->nr_deferred);
@@ -780,6 +784,7 @@ void synchronize_shrinkers(void)
780784
{
781785
down_write(&shrinker_rwsem);
782786
up_write(&shrinker_rwsem);
787+
synchronize_srcu(&shrinker_srcu);
783788
}
784789
EXPORT_SYMBOL(synchronize_shrinkers);
785790

@@ -990,6 +995,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
990995
{
991996
unsigned long ret, freed = 0;
992997
struct shrinker *shrinker;
998+
int srcu_idx;
993999

9941000
/*
9951001
* The root memcg might be allocated even though memcg is disabled
@@ -1001,10 +1007,10 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
10011007
if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg))
10021008
return shrink_slab_memcg(gfp_mask, nid, memcg, priority);
10031009

1004-
if (!down_read_trylock(&shrinker_rwsem))
1005-
goto out;
1010+
srcu_idx = srcu_read_lock(&shrinker_srcu);
10061011

1007-
list_for_each_entry(shrinker, &shrinker_list, list) {
1012+
list_for_each_entry_srcu(shrinker, &shrinker_list, list,
1013+
srcu_read_lock_held(&shrinker_srcu)) {
10081014
struct shrink_control sc = {
10091015
.gfp_mask = gfp_mask,
10101016
.nid = nid,
@@ -1015,19 +1021,9 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid,
10151021
if (ret == SHRINK_EMPTY)
10161022
ret = 0;
10171023
freed += ret;
1018-
/*
1019-
* Bail out if someone want to register a new shrinker to
1020-
* prevent the registration from being stalled for long periods
1021-
* by parallel ongoing shrinking.
1022-
*/
1023-
if (rwsem_is_contended(&shrinker_rwsem)) {
1024-
freed = freed ? : 1;
1025-
break;
1026-
}
10271024
}
10281025

1029-
up_read(&shrinker_rwsem);
1030-
out:
1026+
srcu_read_unlock(&shrinker_srcu, srcu_idx);
10311027
cond_resched();
10321028
return freed;
10331029
}

0 commit comments

Comments
 (0)