Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 701d678

Browse files
HenryBurnstorvalds
authored andcommitted
mm/zsmalloc.c: fix race condition in zs_destroy_pool
In zs_destroy_pool() we call flush_work(&pool->free_work). However, we have no guarantee that migration isn't happening in the background at that time. Since migration can't directly free pages, it relies on free_work being scheduled to free the pages. But there's nothing preventing an in-progress migrate from queuing the work *after* zs_unregister_migration() has called flush_work(). Which would mean pages still pointing at the inode when we free it. Since we know at destroy time all objects should be free, no new migrations can come in (since zs_page_isolate() fails for fully-free zspages). This means it is sufficient to track a "# isolated zspages" count by class, and have the destroy logic ensure all such pages have drained before proceeding. Keeping that state under the class spinlock keeps the logic straightforward. In this case a memory leak could lead to an eventual crash if compaction hits the leaked page. This crash would only occur if people are changing their zswap backend at runtime (which eventually starts destruction). Link: http://lkml.kernel.org/r/[email protected] Fixes: 48b4800 ("zsmalloc: page migration support") Signed-off-by: Henry Burns <[email protected]> Reviewed-by: Sergey Senozhatsky <[email protected]> Cc: Henry Burns <[email protected]> Cc: Minchan Kim <[email protected]> Cc: Shakeel Butt <[email protected]> Cc: Jonathan Adams <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent 1a87aa0 commit 701d678

File tree

1 file changed

+59
-2
lines changed

1 file changed

+59
-2
lines changed

mm/zsmalloc.c

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
#include <linux/mount.h>
5555
#include <linux/pseudo_fs.h>
5656
#include <linux/migrate.h>
57+
#include <linux/wait.h>
5758
#include <linux/pagemap.h>
5859
#include <linux/fs.h>
5960

@@ -268,6 +269,10 @@ struct zs_pool {
268269
#ifdef CONFIG_COMPACTION
269270
struct inode *inode;
270271
struct work_struct free_work;
272+
/* A wait queue for when migration races with async_free_zspage() */
273+
struct wait_queue_head migration_wait;
274+
atomic_long_t isolated_pages;
275+
bool destroying;
271276
#endif
272277
};
273278

@@ -1874,6 +1879,19 @@ static void putback_zspage_deferred(struct zs_pool *pool,
18741879

18751880
}
18761881

1882+
static inline void zs_pool_dec_isolated(struct zs_pool *pool)
1883+
{
1884+
VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0);
1885+
atomic_long_dec(&pool->isolated_pages);
1886+
/*
1887+
* There's no possibility of racing, since wait_for_isolated_drain()
1888+
* checks the isolated count under &class->lock after enqueuing
1889+
* on migration_wait.
1890+
*/
1891+
if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying)
1892+
wake_up_all(&pool->migration_wait);
1893+
}
1894+
18771895
static void replace_sub_page(struct size_class *class, struct zspage *zspage,
18781896
struct page *newpage, struct page *oldpage)
18791897
{
@@ -1943,6 +1961,7 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
19431961
*/
19441962
if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
19451963
get_zspage_mapping(zspage, &class_idx, &fullness);
1964+
atomic_long_inc(&pool->isolated_pages);
19461965
remove_zspage(class, zspage, fullness);
19471966
}
19481967

@@ -2042,8 +2061,16 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
20422061
* Page migration is done so let's putback isolated zspage to
20432062
* the list if @page is final isolated subpage in the zspage.
20442063
*/
2045-
if (!is_zspage_isolated(zspage))
2064+
if (!is_zspage_isolated(zspage)) {
2065+
/*
2066+
* We cannot race with zs_destroy_pool() here because we wait
2067+
* for isolation to hit zero before we start destroying.
2068+
* Also, we ensure that everyone can see pool->destroying before
2069+
* we start waiting.
2070+
*/
20462071
putback_zspage_deferred(pool, class, zspage);
2072+
zs_pool_dec_isolated(pool);
2073+
}
20472074

20482075
reset_page(page);
20492076
put_page(page);
@@ -2094,8 +2121,8 @@ static void zs_page_putback(struct page *page)
20942121
* so let's defer.
20952122
*/
20962123
putback_zspage_deferred(pool, class, zspage);
2124+
zs_pool_dec_isolated(pool);
20972125
}
2098-
20992126
spin_unlock(&class->lock);
21002127
}
21012128

@@ -2118,8 +2145,36 @@ static int zs_register_migration(struct zs_pool *pool)
21182145
return 0;
21192146
}
21202147

2148+
static bool pool_isolated_are_drained(struct zs_pool *pool)
2149+
{
2150+
return atomic_long_read(&pool->isolated_pages) == 0;
2151+
}
2152+
2153+
/* Function for resolving migration */
2154+
static void wait_for_isolated_drain(struct zs_pool *pool)
2155+
{
2156+
2157+
/*
2158+
* We're in the process of destroying the pool, so there are no
2159+
* active allocations. zs_page_isolate() fails for completely free
2160+
* zspages, so we need only wait for the zs_pool's isolated
2161+
* count to hit zero.
2162+
*/
2163+
wait_event(pool->migration_wait,
2164+
pool_isolated_are_drained(pool));
2165+
}
2166+
21212167
static void zs_unregister_migration(struct zs_pool *pool)
21222168
{
2169+
pool->destroying = true;
2170+
/*
2171+
* We need a memory barrier here to ensure global visibility of
2172+
* pool->destroying. Thus pool->isolated pages will either be 0 in which
2173+
* case we don't care, or it will be > 0 and pool->destroying will
2174+
* ensure that we wake up once isolation hits 0.
2175+
*/
2176+
smp_mb();
2177+
wait_for_isolated_drain(pool); /* This can block */
21232178
flush_work(&pool->free_work);
21242179
iput(pool->inode);
21252180
}
@@ -2357,6 +2412,8 @@ struct zs_pool *zs_create_pool(const char *name)
23572412
if (!pool->name)
23582413
goto err;
23592414

2415+
init_waitqueue_head(&pool->migration_wait);
2416+
23602417
if (create_cache(pool))
23612418
goto err;
23622419

0 commit comments

Comments
 (0)