Skip to content

Commit 9ea459e

Browse files
author
Al Viro
committed
delayed mntput
On final mntput() we want fs shutdown to happen before return to userland; however, the only case where we want it happen right there (i.e. where task_work_add won't do) is MNT_INTERNAL victim. Those have to be fully synchronous - failure halfway through module init might count on having vfsmount killed right there. Fortunately, final mntput on MNT_INTERNAL vfsmounts happens on shallow stack. So we handle those synchronously and do an analog of delayed fput logics for everything else. As the result, we are guaranteed that fs shutdown will always happen on shallow stack. Signed-off-by: Al Viro <[email protected]>
1 parent b3ca406 commit 9ea459e

2 files changed

Lines changed: 57 additions & 19 deletions

File tree

fs/mount.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,10 @@ struct mount {
2929
struct mount *mnt_parent;
3030
struct dentry *mnt_mountpoint;
3131
struct vfsmount mnt;
32-
struct rcu_head mnt_rcu;
32+
union {
33+
struct rcu_head mnt_rcu;
34+
struct llist_node mnt_llist;
35+
};
3336
#ifdef CONFIG_SMP
3437
struct mnt_pcp __percpu *mnt_pcp;
3538
#else

fs/namespace.c

Lines changed: 53 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <linux/proc_ns.h>
2424
#include <linux/magic.h>
2525
#include <linux/bootmem.h>
26+
#include <linux/task_work.h>
2627
#include "pnode.h"
2728
#include "internal.h"
2829

@@ -957,6 +958,46 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
957958
return ERR_PTR(err);
958959
}
959960

961+
static void cleanup_mnt(struct mount *mnt)
962+
{
963+
/*
964+
* This probably indicates that somebody messed
965+
* up a mnt_want/drop_write() pair. If this
966+
* happens, the filesystem was probably unable
967+
* to make r/w->r/o transitions.
968+
*/
969+
/*
970+
* The locking used to deal with mnt_count decrement provides barriers,
971+
* so mnt_get_writers() below is safe.
972+
*/
973+
WARN_ON(mnt_get_writers(mnt));
974+
if (unlikely(mnt->mnt_pins.first))
975+
mnt_pin_kill(mnt);
976+
fsnotify_vfsmount_delete(&mnt->mnt);
977+
dput(mnt->mnt.mnt_root);
978+
deactivate_super(mnt->mnt.mnt_sb);
979+
mnt_free_id(mnt);
980+
call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
981+
}
982+
983+
static void __cleanup_mnt(struct rcu_head *head)
984+
{
985+
cleanup_mnt(container_of(head, struct mount, mnt_rcu));
986+
}
987+
988+
static LLIST_HEAD(delayed_mntput_list);
989+
static void delayed_mntput(struct work_struct *unused)
990+
{
991+
struct llist_node *node = llist_del_all(&delayed_mntput_list);
992+
struct llist_node *next;
993+
994+
for (; node; node = next) {
995+
next = llist_next(node);
996+
cleanup_mnt(llist_entry(node, struct mount, mnt_llist));
997+
}
998+
}
999+
static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
1000+
9601001
static void mntput_no_expire(struct mount *mnt)
9611002
{
9621003
rcu_read_lock();
@@ -982,24 +1023,18 @@ static void mntput_no_expire(struct mount *mnt)
9821023
list_del(&mnt->mnt_instance);
9831024
unlock_mount_hash();
9841025

985-
/*
986-
* This probably indicates that somebody messed
987-
* up a mnt_want/drop_write() pair. If this
988-
* happens, the filesystem was probably unable
989-
* to make r/w->r/o transitions.
990-
*/
991-
/*
992-
* The locking used to deal with mnt_count decrement provides barriers,
993-
* so mnt_get_writers() below is safe.
994-
*/
995-
WARN_ON(mnt_get_writers(mnt));
996-
if (unlikely(mnt->mnt_pins.first))
997-
mnt_pin_kill(mnt);
998-
fsnotify_vfsmount_delete(&mnt->mnt);
999-
dput(mnt->mnt.mnt_root);
1000-
deactivate_super(mnt->mnt.mnt_sb);
1001-
mnt_free_id(mnt);
1002-
call_rcu(&mnt->mnt_rcu, delayed_free_vfsmnt);
1026+
if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) {
1027+
struct task_struct *task = current;
1028+
if (likely(!(task->flags & PF_KTHREAD))) {
1029+
init_task_work(&mnt->mnt_rcu, __cleanup_mnt);
1030+
if (!task_work_add(task, &mnt->mnt_rcu, true))
1031+
return;
1032+
}
1033+
if (llist_add(&mnt->mnt_llist, &delayed_mntput_list))
1034+
schedule_delayed_work(&delayed_mntput_work, 1);
1035+
return;
1036+
}
1037+
cleanup_mnt(mnt);
10031038
}
10041039

10051040
void mntput(struct vfsmount *mnt)

0 commit comments

Comments
 (0)