vfs: syscall: Add open_tree(2) to reference or clone a mount

open_tree(dfd, pathname, flags)

Returns an O_PATH-opened file descriptor or an error.
dfd and pathname specify the location to open, in usual
fashion (see e.g. fstatat(2)).  flags should be an OR of
some of the following:
	* AT_PATH_EMPTY, AT_NO_AUTOMOUNT, AT_SYMLINK_NOFOLLOW -
same meanings as usual
	* OPEN_TREE_CLOEXEC - make the resulting descriptor
close-on-exec
	* OPEN_TREE_CLONE or OPEN_TREE_CLONE | AT_RECURSIVE -
instead of opening the location in question, create a detached
mount tree matching the subtree rooted at location specified by
dfd/pathname.  With AT_RECURSIVE the entire subtree is cloned,
without it - only the part within in the mount containing the
location in question.  In other words, the same as mount --rbind
or mount --bind would've taken.  The detached tree will be
dissolved on the final close of obtained file.  Creation of such
detached trees requires the same capabilities as doing mount --bind.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
cc: linux-api@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
This commit is contained in:
Al Viro
2018-11-05 17:40:30 +00:00
parent 9e98c678c2
commit a07b200047
9 changed files with 159 additions and 28 deletions

View File

@@ -20,6 +20,7 @@
#include <linux/init.h> /* init_rootfs */
#include <linux/fs_struct.h> /* get_fs_root et.al. */
#include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */
#include <linux/file.h>
#include <linux/uaccess.h>
#include <linux/proc_ns.h>
#include <linux/magic.h>
@@ -1832,6 +1833,21 @@ struct vfsmount *collect_mounts(const struct path *path)
return &tree->mnt;
}
static void free_mnt_ns(struct mnt_namespace *);
static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *, bool);
void dissolve_on_fput(struct vfsmount *mnt)
{
struct mnt_namespace *ns;
namespace_lock();
lock_mount_hash();
ns = real_mount(mnt)->mnt_ns;
umount_tree(real_mount(mnt), UMOUNT_CONNECTED);
unlock_mount_hash();
namespace_unlock();
free_mnt_ns(ns);
}
void drop_collected_mounts(struct vfsmount *mnt)
{
namespace_lock();
@@ -2222,6 +2238,30 @@ static bool has_locked_children(struct mount *mnt, struct dentry *dentry)
return false;
}
static struct mount *__do_loopback(struct path *old_path, int recurse)
{
struct mount *mnt = ERR_PTR(-EINVAL), *old = real_mount(old_path->mnt);
if (IS_MNT_UNBINDABLE(old))
return mnt;
if (!check_mnt(old) && old_path->dentry->d_op != &ns_dentry_operations)
return mnt;
if (!recurse && has_locked_children(old, old_path->dentry))
return mnt;
if (recurse)
mnt = copy_tree(old, old_path->dentry, CL_COPY_MNT_NS_FILE);
else
mnt = clone_mnt(old, old_path->dentry, 0);
if (!IS_ERR(mnt))
mnt->mnt.mnt_flags &= ~MNT_LOCKED;
return mnt;
}
/*
* do loopback mount.
*/
@@ -2229,7 +2269,7 @@ static int do_loopback(struct path *path, const char *old_name,
int recurse)
{
struct path old_path;
struct mount *mnt = NULL, *old, *parent;
struct mount *mnt = NULL, *parent;
struct mountpoint *mp;
int err;
if (!old_name || !*old_name)
@@ -2243,38 +2283,21 @@ static int do_loopback(struct path *path, const char *old_name,
goto out;
mp = lock_mount(path);
err = PTR_ERR(mp);
if (IS_ERR(mp))
if (IS_ERR(mp)) {
err = PTR_ERR(mp);
goto out;
}
old = real_mount(old_path.mnt);
parent = real_mount(path->mnt);
err = -EINVAL;
if (IS_MNT_UNBINDABLE(old))
goto out2;
if (!check_mnt(parent))
goto out2;
if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
goto out2;
if (!recurse && has_locked_children(old, old_path.dentry))
goto out2;
if (recurse)
mnt = copy_tree(old, old_path.dentry, CL_COPY_MNT_NS_FILE);
else
mnt = clone_mnt(old, old_path.dentry, 0);
mnt = __do_loopback(&old_path, recurse);
if (IS_ERR(mnt)) {
err = PTR_ERR(mnt);
goto out2;
}
mnt->mnt.mnt_flags &= ~MNT_LOCKED;
err = graft_tree(mnt, parent, mp);
if (err) {
lock_mount_hash();
@@ -2288,6 +2311,96 @@ out:
return err;
}
static struct file *open_detached_copy(struct path *path, bool recursive)
{
struct user_namespace *user_ns = current->nsproxy->mnt_ns->user_ns;
struct mnt_namespace *ns = alloc_mnt_ns(user_ns, true);
struct mount *mnt, *p;
struct file *file;
if (IS_ERR(ns))
return ERR_CAST(ns);
namespace_lock();
mnt = __do_loopback(path, recursive);
if (IS_ERR(mnt)) {
namespace_unlock();
free_mnt_ns(ns);
return ERR_CAST(mnt);
}
lock_mount_hash();
for (p = mnt; p; p = next_mnt(p, mnt)) {
p->mnt_ns = ns;
ns->mounts++;
}
ns->root = mnt;
list_add_tail(&ns->list, &mnt->mnt_list);
mntget(&mnt->mnt);
unlock_mount_hash();
namespace_unlock();
mntput(path->mnt);
path->mnt = &mnt->mnt;
file = dentry_open(path, O_PATH, current_cred());
if (IS_ERR(file))
dissolve_on_fput(path->mnt);
else
file->f_mode |= FMODE_NEED_UNMOUNT;
return file;
}
SYSCALL_DEFINE3(open_tree, int, dfd, const char *, filename, unsigned, flags)
{
struct file *file;
struct path path;
int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
bool detached = flags & OPEN_TREE_CLONE;
int error;
int fd;
BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC);
if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE |
AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE |
OPEN_TREE_CLOEXEC))
return -EINVAL;
if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE)
return -EINVAL;
if (flags & AT_NO_AUTOMOUNT)
lookup_flags &= ~LOOKUP_AUTOMOUNT;
if (flags & AT_SYMLINK_NOFOLLOW)
lookup_flags &= ~LOOKUP_FOLLOW;
if (flags & AT_EMPTY_PATH)
lookup_flags |= LOOKUP_EMPTY;
if (detached && !may_mount())
return -EPERM;
fd = get_unused_fd_flags(flags & O_CLOEXEC);
if (fd < 0)
return fd;
error = user_path_at(dfd, filename, lookup_flags, &path);
if (unlikely(error)) {
file = ERR_PTR(error);
} else {
if (detached)
file = open_detached_copy(&path, flags & AT_RECURSIVE);
else
file = dentry_open(&path, O_PATH, current_cred());
path_put(&path);
}
if (IS_ERR(file)) {
put_unused_fd(fd);
return PTR_ERR(file);
}
fd_install(fd, file);
return fd;
}
/*
* Don't allow locked mount flags to be cleared.
*