summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/idmappings.rst72
-rw-r--r--fs/cachefiles/bind.c2
-rw-r--r--fs/ecryptfs/main.c2
-rw-r--r--fs/ksmbd/smbacl.c19
-rw-r--r--fs/ksmbd/smbacl.h5
-rw-r--r--fs/namespace.c53
-rw-r--r--fs/nfsd/export.c2
-rw-r--r--fs/open.c8
-rw-r--r--fs/overlayfs/super.c2
-rw-r--r--fs/posix_acl.c17
-rw-r--r--fs/proc_namespace.c2
-rw-r--r--fs/xfs/xfs_inode.c8
-rw-r--r--fs/xfs/xfs_linux.h1
-rw-r--r--fs/xfs/xfs_symlink.c4
-rw-r--r--include/linux/fs.h141
-rw-r--r--include/linux/mnt_idmapping.h234
-rw-r--r--security/commoncap.c15
17 files changed, 356 insertions, 231 deletions
diff --git a/Documentation/filesystems/idmappings.rst b/Documentation/filesystems/idmappings.rst
index 1229a75ec75d..7a879ec3b6bf 100644
--- a/Documentation/filesystems/idmappings.rst
+++ b/Documentation/filesystems/idmappings.rst
@@ -952,75 +952,3 @@ The raw userspace id that is put on disk is ``u1000`` so when the user takes
their home directory back to their home computer where they are assigned
``u1000`` using the initial idmapping and mount the filesystem with the initial
idmapping they will see all those files owned by ``u1000``.
-
-Shortcircuting
---------------
-
-Currently, the implementation of idmapped mounts enforces that the filesystem
-is mounted with the initial idmapping. The reason is simply that none of the
-filesystems that we targeted were mountable with a non-initial idmapping. But
-that might change soon enough. As we've seen above, thanks to the properties of
-idmappings the translation works for both filesystems mounted with the initial
-idmapping and filesystem with non-initial idmappings.
-
-Based on this current restriction to filesystem mounted with the initial
-idmapping two noticeable shortcuts have been taken:
-
-1. We always stash a reference to the initial user namespace in ``struct
- vfsmount``. Idmapped mounts are thus mounts that have a non-initial user
- namespace attached to them.
-
- In order to support idmapped mounts this needs to be changed. Instead of
- stashing the initial user namespace the user namespace the filesystem was
- mounted with must be stashed. An idmapped mount is then any mount that has
- a different user namespace attached then the filesystem was mounted with.
- This has no user-visible consequences.
-
-2. The translation algorithms in ``mapped_fs*id()`` and ``i_*id_into_mnt()``
- are simplified.
-
- Let's consider ``mapped_fs*id()`` first. This function translates the
- caller's kernel id into a kernel id in the filesystem's idmapping via
- a mount's idmapping. The full algorithm is::
-
- mapped_fsuid(kid):
- /* Map the kernel id up into a userspace id in the mount's idmapping. */
- from_kuid(mount-idmapping, kid) = uid
-
- /* Map the userspace id down into a kernel id in the filesystem's idmapping. */
- make_kuid(filesystem-idmapping, uid) = kuid
-
- We know that the filesystem is always mounted with the initial idmapping as
- we enforce this in ``mount_setattr()``. So this can be shortened to::
-
- mapped_fsuid(kid):
- /* Map the kernel id up into a userspace id in the mount's idmapping. */
- from_kuid(mount-idmapping, kid) = uid
-
- /* Map the userspace id down into a kernel id in the filesystem's idmapping. */
- KUIDT_INIT(uid) = kuid
-
- Similarly, for ``i_*id_into_mnt()`` which translated the filesystem's kernel
- id into a mount's kernel id::
-
- i_uid_into_mnt(kid):
- /* Map the kernel id up into a userspace id in the filesystem's idmapping. */
- from_kuid(filesystem-idmapping, kid) = uid
-
- /* Map the userspace id down into a kernel id in the mounts's idmapping. */
- make_kuid(mount-idmapping, uid) = kuid
-
- Again, we know that the filesystem is always mounted with the initial
- idmapping as we enforce this in ``mount_setattr()``. So this can be
- shortened to::
-
- i_uid_into_mnt(kid):
- /* Map the kernel id up into a userspace id in the filesystem's idmapping. */
- __kuid_val(kid) = uid
-
- /* Map the userspace id down into a kernel id in the mounts's idmapping. */
- make_kuid(mount-idmapping, uid) = kuid
-
-Handling filesystems mounted with non-initial idmappings requires that the
-translation functions be converted to their full form. They can still be
-shortcircuited on non-idmapped mounts. This has no user-visible consequences.
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index d463d89f5db8..146291be6263 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -117,7 +117,7 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
root = path.dentry;
ret = -EINVAL;
- if (mnt_user_ns(path.mnt) != &init_user_ns) {
+ if (is_idmapped_mnt(path.mnt)) {
pr_warn("File cache on idmapped mounts not supported");
goto error_unsupported;
}
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index d66bbd2df191..2dd23a82e0de 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -537,7 +537,7 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
goto out_free;
}
- if (mnt_user_ns(path.mnt) != &init_user_ns) {
+ if (is_idmapped_mnt(path.mnt)) {
rc = -EINVAL;
printk(KERN_ERR "Mounting on idmapped mounts currently disallowed\n");
goto out_free;
diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c
index bd792db32623..6ecf55ea1fed 100644
--- a/fs/ksmbd/smbacl.c
+++ b/fs/ksmbd/smbacl.c
@@ -9,6 +9,7 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/mnt_idmapping.h>
#include "smbacl.h"
#include "smb_common.h"
@@ -274,14 +275,7 @@ static int sid_to_id(struct user_namespace *user_ns,
uid_t id;
id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
- /*
- * Translate raw sid into kuid in the server's user
- * namespace.
- */
- uid = make_kuid(&init_user_ns, id);
-
- /* If this is an idmapped mount, apply the idmapping. */
- uid = kuid_from_mnt(user_ns, uid);
+ uid = mapped_kuid_user(user_ns, &init_user_ns, KUIDT_INIT(id));
if (uid_valid(uid)) {
fattr->cf_uid = uid;
rc = 0;
@@ -291,14 +285,7 @@ static int sid_to_id(struct user_namespace *user_ns,
gid_t id;
id = le32_to_cpu(psid->sub_auth[psid->num_subauth - 1]);
- /*
- * Translate raw sid into kgid in the server's user
- * namespace.
- */
- gid = make_kgid(&init_user_ns, id);
-
- /* If this is an idmapped mount, apply the idmapping. */
- gid = kgid_from_mnt(user_ns, gid);
+ gid = mapped_kgid_user(user_ns, &init_user_ns, KGIDT_INIT(id));
if (gid_valid(gid)) {
fattr->cf_gid = gid;
rc = 0;
diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h
index 73e08cad412b..811af3309429 100644
--- a/fs/ksmbd/smbacl.h
+++ b/fs/ksmbd/smbacl.h
@@ -11,6 +11,7 @@
#include <linux/fs.h>
#include <linux/namei.h>
#include <linux/posix_acl.h>
+#include <linux/mnt_idmapping.h>
#include "mgmt/tree_connect.h"
@@ -216,7 +217,7 @@ static inline uid_t posix_acl_uid_translate(struct user_namespace *mnt_userns,
kuid_t kuid;
/* If this is an idmapped mount, apply the idmapping. */
- kuid = kuid_into_mnt(mnt_userns, pace->e_uid);
+ kuid = mapped_kuid_fs(mnt_userns, &init_user_ns, pace->e_uid);
/* Translate the kuid into a userspace id ksmbd would see. */
return from_kuid(&init_user_ns, kuid);
@@ -228,7 +229,7 @@ static inline gid_t posix_acl_gid_translate(struct user_namespace *mnt_userns,
kgid_t kgid;
/* If this is an idmapped mount, apply the idmapping. */
- kgid = kgid_into_mnt(mnt_userns, pace->e_gid);
+ kgid = mapped_kgid_fs(mnt_userns, &init_user_ns, pace->e_gid);
/* Translate the kgid into a userspace id ksmbd would see. */
return from_kgid(&init_user_ns, kgid);
diff --git a/fs/namespace.c b/fs/namespace.c
index b696543adab8..dc31ad6b370f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -31,6 +31,7 @@
#include <uapi/linux/mount.h>
#include <linux/fs_context.h>
#include <linux/shmem_fs.h>
+#include <linux/mnt_idmapping.h>
#include "pnode.h"
#include "internal.h"
@@ -561,7 +562,7 @@ static void free_vfsmnt(struct mount *mnt)
struct user_namespace *mnt_userns;
mnt_userns = mnt_user_ns(&mnt->mnt);
- if (mnt_userns != &init_user_ns)
+ if (!initial_idmapping(mnt_userns))
put_user_ns(mnt_userns);
kfree_const(mnt->mnt_devname);
#ifdef CONFIG_SMP
@@ -965,6 +966,7 @@ static struct mount *skip_mnt_tree(struct mount *p)
struct vfsmount *vfs_create_mount(struct fs_context *fc)
{
struct mount *mnt;
+ struct user_namespace *fs_userns;
if (!fc->root)
return ERR_PTR(-EINVAL);
@@ -982,6 +984,10 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc)
mnt->mnt_mountpoint = mnt->mnt.mnt_root;
mnt->mnt_parent = mnt;
+ fs_userns = mnt->mnt.mnt_sb->s_user_ns;
+ if (!initial_idmapping(fs_userns))
+ mnt->mnt.mnt_userns = get_user_ns(fs_userns);
+
lock_mount_hash();
list_add_tail(&mnt->mnt_instance, &mnt->mnt.mnt_sb->s_mounts);
unlock_mount_hash();
@@ -1072,7 +1078,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
atomic_inc(&sb->s_active);
mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
- if (mnt->mnt.mnt_userns != &init_user_ns)
+ if (!initial_idmapping(mnt->mnt.mnt_userns))
mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
mnt->mnt.mnt_sb = sb;
mnt->mnt.mnt_root = dget(root);
@@ -3927,28 +3933,32 @@ static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
{
struct vfsmount *m = &mnt->mnt;
+ struct user_namespace *fs_userns = m->mnt_sb->s_user_ns;
if (!kattr->mnt_userns)
return 0;
/*
+ * Creating an idmapped mount with the filesystem wide idmapping
+ * doesn't make sense so block that. We don't allow mushy semantics.
+ */
+ if (kattr->mnt_userns == fs_userns)
+ return -EINVAL;
+
+ /*
* Once a mount has been idmapped we don't allow it to change its
* mapping. It makes things simpler and callers can just create
* another bind-mount they can idmap if they want to.
*/
- if (mnt_user_ns(m) != &init_user_ns)
+ if (is_idmapped_mnt(m))
return -EPERM;
/* The underlying filesystem doesn't support idmapped mounts yet. */
if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
return -EINVAL;
- /* Don't yet support filesystem mountable in user namespaces. */
- if (m->mnt_sb->s_user_ns != &init_user_ns)
- return -EINVAL;
-
/* We're not controlling the superblock. */
- if (!capable(CAP_SYS_ADMIN))
+ if (!ns_capable(fs_userns, CAP_SYS_ADMIN))
return -EPERM;
/* Mount has already been visible in the filesystem hierarchy. */
@@ -4002,14 +4012,27 @@ out:
static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
{
- struct user_namespace *mnt_userns;
+ struct user_namespace *mnt_userns, *old_mnt_userns;
if (!kattr->mnt_userns)
return;
+ /*
+ * We're the only ones able to change the mount's idmapping. So
+ * mnt->mnt.mnt_userns is stable and we can retrieve it directly.
+ */
+ old_mnt_userns = mnt->mnt.mnt_userns;
+
mnt_userns = get_user_ns(kattr->mnt_userns);
/* Pairs with smp_load_acquire() in mnt_user_ns(). */
smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
+
+ /*
+ * If this is an idmapped filesystem drop the reference we've taken
+ * in vfs_create_mount() before.
+ */
+ if (!initial_idmapping(old_mnt_userns))
+ put_user_ns(old_mnt_userns);
}
static void mount_setattr_commit(struct mount_kattr *kattr,
@@ -4133,13 +4156,15 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
}
/*
- * The init_user_ns is used to indicate that a vfsmount is not idmapped.
- * This is simpler than just having to treat NULL as unmapped. Users
- * wanting to idmap a mount to init_user_ns can just use a namespace
- * with an identity mapping.
+ * The initial idmapping cannot be used to create an idmapped
+ * mount. We use the initial idmapping as an indicator of a mount
+ * that is not idmapped. It can simply be passed into helpers that
+ * are aware of idmapped mounts as a convenient shortcut. A user
+ * can just create a dedicated identity mapping to achieve the same
+ * result.
*/
mnt_userns = container_of(ns, struct user_namespace, ns);
- if (mnt_userns == &init_user_ns) {
+ if (initial_idmapping(mnt_userns)) {
err = -EPERM;
goto out_fput;
}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 9421dae22737..668c7527b17e 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -427,7 +427,7 @@ static int check_export(struct path *path, int *flags, unsigned char *uuid)
return -EINVAL;
}
- if (mnt_user_ns(path->mnt) != &init_user_ns) {
+ if (is_idmapped_mnt(path->mnt)) {
dprintk("exp_export: export of idmapped mounts not yet supported.\n");
return -EINVAL;
}
diff --git a/fs/open.c b/fs/open.c
index f732fb94600c..9ff2f621b760 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -32,6 +32,7 @@
#include <linux/ima.h>
#include <linux/dnotify.h>
#include <linux/compat.h>
+#include <linux/mnt_idmapping.h>
#include "internal.h"
@@ -640,7 +641,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
int chown_common(const struct path *path, uid_t user, gid_t group)
{
- struct user_namespace *mnt_userns;
+ struct user_namespace *mnt_userns, *fs_userns;
struct inode *inode = path->dentry->d_inode;
struct inode *delegated_inode = NULL;
int error;
@@ -652,8 +653,9 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
gid = make_kgid(current_user_ns(), group);
mnt_userns = mnt_user_ns(path->mnt);
- uid = kuid_from_mnt(mnt_userns, uid);
- gid = kgid_from_mnt(mnt_userns, gid);
+ fs_userns = i_user_ns(inode);
+ uid = mapped_kuid_user(mnt_userns, fs_userns, uid);
+ gid = mapped_kgid_user(mnt_userns, fs_userns, gid);
retry_deleg:
newattrs.ia_valid = ATTR_CTIME;
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 265181c110ae..7bb0a47cb615 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -873,7 +873,7 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path)
pr_err("filesystem on '%s' not supported\n", name);
goto out_put;
}
- if (mnt_user_ns(path->mnt) != &init_user_ns) {
+ if (is_idmapped_mnt(path->mnt)) {
pr_err("idmapped layers are currently not supported\n");
goto out_put;
}
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 9323a854a60a..80acb6885cf9 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -23,6 +23,7 @@
#include <linux/export.h>
#include <linux/user_namespace.h>
#include <linux/namei.h>
+#include <linux/mnt_idmapping.h>
static struct posix_acl **acl_by_type(struct inode *inode, int type)
{
@@ -374,7 +375,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
goto check_perm;
break;
case ACL_USER:
- uid = kuid_into_mnt(mnt_userns, pa->e_uid);
+ uid = mapped_kuid_fs(mnt_userns,
+ i_user_ns(inode),
+ pa->e_uid);
if (uid_eq(uid, current_fsuid()))
goto mask;
break;
@@ -387,7 +390,9 @@ posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
}
break;
case ACL_GROUP:
- gid = kgid_into_mnt(mnt_userns, pa->e_gid);
+ gid = mapped_kgid_fs(mnt_userns,
+ i_user_ns(inode),
+ pa->e_gid);
if (in_group_p(gid)) {
found = 1;
if ((pa->e_perm & want) == want)
@@ -734,17 +739,17 @@ static void posix_acl_fix_xattr_userns(
case ACL_USER:
uid = make_kuid(from, le32_to_cpu(entry->e_id));
if (from_user)
- uid = kuid_from_mnt(mnt_userns, uid);
+ uid = mapped_kuid_user(mnt_userns, &init_user_ns, uid);
else
- uid = kuid_into_mnt(mnt_userns, uid);
+ uid = mapped_kuid_fs(mnt_userns, &init_user_ns, uid);
entry->e_id = cpu_to_le32(from_kuid(to, uid));
break;
case ACL_GROUP:
gid = make_kgid(from, le32_to_cpu(entry->e_id));
if (from_user)
- gid = kgid_from_mnt(mnt_userns, gid);
+ gid = mapped_kgid_user(mnt_userns, &init_user_ns, gid);
else
- gid = kgid_into_mnt(mnt_userns, gid);
+ gid = mapped_kgid_fs(mnt_userns, &init_user_ns, gid);
entry->e_id = cpu_to_le32(from_kgid(to, gid));
break;
default:
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 392ef5162655..49650e54d2f8 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -80,7 +80,7 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
seq_puts(m, fs_infop->str);
}
- if (mnt_user_ns(mnt) != &init_user_ns)
+ if (is_idmapped_mnt(mnt))
seq_puts(m, ",idmapped");
}
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 6771f357ad2c..04bf467b1090 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -988,8 +988,8 @@ xfs_create(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
- mapped_fsgid(mnt_userns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
+ mapped_fsgid(mnt_userns, &init_user_ns), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
@@ -1142,8 +1142,8 @@ xfs_create_tmpfile(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
- mapped_fsgid(mnt_userns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
+ mapped_fsgid(mnt_userns, &init_user_ns), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index c174262a074e..09a8fba84ff9 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -61,6 +61,7 @@ typedef __u32 xfs_nlink_t;
#include <linux/ratelimit.h>
#include <linux/rhashtable.h>
#include <linux/xattr.h>
+#include <linux/mnt_idmapping.h>
#include <asm/page.h>
#include <asm/div64.h>
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index fc2c6a404647..a31d2e5d0321 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -184,8 +184,8 @@ xfs_symlink(
/*
* Make sure that we have allocated dquot(s) on disk.
*/
- error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns),
- mapped_fsgid(mnt_userns), prid,
+ error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
+ mapped_fsgid(mnt_userns, &init_user_ns), prid,
XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
&udqp, &gdqp, &pdqp);
if (error)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bbf812ce89a8..493b87e3616b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -41,6 +41,7 @@
#include <linux/stddef.h>
#include <linux/mount.h>
#include <linux/cred.h>
+#include <linux/mnt_idmapping.h>
#include <asm/byteorder.h>
#include <uapi/linux/fs.h>
@@ -1599,6 +1600,11 @@ struct super_block {
struct list_head s_inodes_wb; /* writeback inodes */
} __randomize_layout;
+static inline struct user_namespace *i_user_ns(const struct inode *inode)
+{
+ return inode->i_sb->s_user_ns;
+}
+
/* Helper functions so that in most cases filesystems will
* not need to deal directly with kuid_t and kgid_t and can
* instead deal with the raw numeric values that are stored
@@ -1606,50 +1612,22 @@ struct super_block {
*/
static inline uid_t i_uid_read(const struct inode *inode)
{
- return from_kuid(inode->i_sb->s_user_ns, inode->i_uid);
+ return from_kuid(i_user_ns(inode), inode->i_uid);
}
static inline gid_t i_gid_read(const struct inode *inode)
{
- return from_kgid(inode->i_sb->s_user_ns, inode->i_gid);
+ return from_kgid(i_user_ns(inode), inode->i_gid);
}
static inline void i_uid_write(struct inode *inode, uid_t uid)
{
- inode->i_uid = make_kuid(inode->i_sb->s_user_ns, uid);
+ inode->i_uid = make_kuid(i_user_ns(inode), uid);
}
static inline void i_gid_write(struct inode *inode, gid_t gid)
{
- inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
-}
-
-/**
- * kuid_into_mnt - map a kuid down into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kuid: kuid to be mapped
- *
- * Return: @kuid mapped according to @mnt_userns.
- * If @kuid has no mapping INVALID_UID is returned.
- */
-static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns,
- kuid_t kuid)
-{
- return make_kuid(mnt_userns, __kuid_val(kuid));
-}
-
-/**
- * kgid_into_mnt - map a kgid down into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kgid: kgid to be mapped
- *
- * Return: @kgid mapped according to @mnt_userns.
- * If @kgid has no mapping INVALID_GID is returned.
- */
-static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns,
- kgid_t kgid)
-{
- return make_kgid(mnt_userns, __kgid_val(kgid));
+ inode->i_gid = make_kgid(i_user_ns(inode), gid);
}
/**
@@ -1663,7 +1641,7 @@ static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns,
static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
const struct inode *inode)
{
- return kuid_into_mnt(mnt_userns, inode->i_uid);
+ return mapped_kuid_fs(mnt_userns, i_user_ns(inode), inode->i_uid);
}
/**
@@ -1677,69 +1655,7 @@ static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
const struct inode *inode)
{
- return kgid_into_mnt(mnt_userns, inode->i_gid);
-}
-
-/**
- * kuid_from_mnt - map a kuid up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kuid: kuid to be mapped
- *
- * Return: @kuid mapped up according to @mnt_userns.
- * If @kuid has no mapping INVALID_UID is returned.
- */
-static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns,
- kuid_t kuid)
-{
- return KUIDT_INIT(from_kuid(mnt_userns, kuid));
-}
-
-/**
- * kgid_from_mnt - map a kgid up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- * @kgid: kgid to be mapped
- *
- * Return: @kgid mapped up according to @mnt_userns.
- * If @kgid has no mapping INVALID_GID is returned.
- */
-static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns,
- kgid_t kgid)
-{
- return KGIDT_INIT(from_kgid(mnt_userns, kgid));
-}
-
-/**
- * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- *
- * Use this helper to initialize a new vfs or filesystem object based on
- * the caller's fsuid. A common example is initializing the i_uid field of
- * a newly allocated inode triggered by a creation event such as mkdir or
- * O_CREAT. Other examples include the allocation of quotas for a specific
- * user.
- *
- * Return: the caller's current fsuid mapped up according to @mnt_userns.
- */
-static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns)
-{
- return kuid_from_mnt(mnt_userns, current_fsuid());
-}
-
-/**
- * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
- * @mnt_userns: user namespace of the relevant mount
- *
- * Use this helper to initialize a new vfs or filesystem object based on
- * the caller's fsgid. A common example is initializing the i_gid field of
- * a newly allocated inode triggered by a creation event such as mkdir or
- * O_CREAT. Other examples include the allocation of quotas for a specific
- * user.
- *
- * Return: the caller's current fsgid mapped up according to @mnt_userns.
- */
-static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns)
-{
- return kgid_from_mnt(mnt_userns, current_fsgid());
+ return mapped_kgid_fs(mnt_userns, i_user_ns(inode), inode->i_gid);
}
/**
@@ -1753,7 +1669,7 @@ static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns)
static inline void inode_fsuid_set(struct inode *inode,
struct user_namespace *mnt_userns)
{
- inode->i_uid = mapped_fsuid(mnt_userns);
+ inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode));
}
/**
@@ -1767,7 +1683,7 @@ static inline void inode_fsuid_set(struct inode *inode,
static inline void inode_fsgid_set(struct inode *inode,
struct user_namespace *mnt_userns)
{
- inode->i_gid = mapped_fsgid(mnt_userns);
+ inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode));
}
/**
@@ -1784,10 +1700,18 @@ static inline void inode_fsgid_set(struct inode *inode,
static inline bool fsuidgid_has_mapping(struct super_block *sb,
struct user_namespace *mnt_userns)
{
- struct user_namespace *s_user_ns = sb->s_user_ns;
+ struct user_namespace *fs_userns = sb->s_user_ns;
+ kuid_t kuid;
+ kgid_t kgid;
- return kuid_has_mapping(s_user_ns, mapped_fsuid(mnt_userns)) &&
- kgid_has_mapping(s_user_ns, mapped_fsgid(mnt_userns));
+ kuid = mapped_fsuid(mnt_userns, fs_userns);
+ if (!uid_valid(kuid))
+ return false;
+ kgid = mapped_fsgid(mnt_userns, fs_userns);
+ if (!gid_valid(kgid))
+ return false;
+ return kuid_has_mapping(fs_userns, kuid) &&
+ kgid_has_mapping(fs_userns, kgid);
}
extern struct timespec64 current_time(struct inode *inode);
@@ -2724,6 +2648,21 @@ static inline struct user_namespace *file_mnt_user_ns(struct file *file)
{
return mnt_user_ns(file->f_path.mnt);
}
+
+/**
+ * is_idmapped_mnt - check whether a mount is mapped
+ * @mnt: the mount to check
+ *
+ * If @mnt has an idmapping attached different from the
+ * filesystem's idmapping then @mnt is mapped.
+ *
+ * Return: true if mount is mapped, false if not.
+ */
+static inline bool is_idmapped_mnt(const struct vfsmount *mnt)
+{
+ return mnt_user_ns(mnt) != mnt->mnt_sb->s_user_ns;
+}
+
extern long vfs_truncate(const struct path *, loff_t);
int do_truncate(struct user_namespace *, struct dentry *, loff_t start,
unsigned int time_attrs, struct file *filp);
diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
new file mode 100644
index 000000000000..ee5a217de2a8
--- /dev/null
+++ b/include/linux/mnt_idmapping.h
@@ -0,0 +1,234 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MNT_IDMAPPING_H
+#define _LINUX_MNT_IDMAPPING_H
+
+#include <linux/types.h>
+#include <linux/uidgid.h>
+
+struct user_namespace;
+/*
+ * Carries the initial idmapping of 0:0:4294967295 which is an identity
+ * mapping. This means that {g,u}id 0 is mapped to {g,u}id 0, {g,u}id 1 is
+ * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
+ */
+extern struct user_namespace init_user_ns;
+
+/**
+ * initial_idmapping - check whether this is the initial mapping
+ * @ns: idmapping to check
+ *
+ * Check whether this is the initial mapping, mapping 0 to 0, 1 to 1,
+ * [...], 1000 to 1000 [...].
+ *
+ * Return: true if this is the initial mapping, false if not.
+ */
+static inline bool initial_idmapping(const struct user_namespace *ns)
+{
+ return ns == &init_user_ns;
+}
+
+/**
+ * no_idmapping - check whether we can skip remapping a kuid/gid
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ *
+ * This function can be used to check whether a remapping between two
+ * idmappings is required.
+ * An idmapped mount is a mount that has an idmapping attached to it that
+ * is different from the filsystem's idmapping and the initial idmapping.
+ * If the initial mapping is used or the idmapping of the mount and the
+ * filesystem are identical no remapping is required.
+ *
+ * Return: true if remapping can be skipped, false if not.
+ */
+static inline bool no_idmapping(const struct user_namespace *mnt_userns,
+ const struct user_namespace *fs_userns)
+{
+ return initial_idmapping(mnt_userns) || mnt_userns == fs_userns;
+}
+
+/**
+ * mapped_kuid_fs - map a filesystem kuid into a mnt_userns
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @kuid : kuid to be mapped
+ *
+ * Take a @kuid and remap it from @fs_userns into @mnt_userns. Use this
+ * function when preparing a @kuid to be reported to userspace.
+ *
+ * If no_idmapping() determines that this is not an idmapped mount we can
+ * simply return @kuid unchanged.
+ * If initial_idmapping() tells us that the filesystem is not mounted with an
+ * idmapping we know the value of @kuid won't change when calling
+ * from_kuid() so we can simply retrieve the value via __kuid_val()
+ * directly.
+ *
+ * Return: @kuid mapped according to @mnt_userns.
+ * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is
+ * returned.
+ */
+static inline kuid_t mapped_kuid_fs(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kuid_t kuid)
+{
+ uid_t uid;
+
+ if (no_idmapping(mnt_userns, fs_userns))
+ return kuid;
+ if (initial_idmapping(fs_userns))
+ uid = __kuid_val(kuid);
+ else
+ uid = from_kuid(fs_userns, kuid);
+ if (uid == (uid_t)-1)
+ return INVALID_UID;
+ return make_kuid(mnt_userns, uid);
+}
+
+/**
+ * mapped_kgid_fs - map a filesystem kgid into a mnt_userns
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @kgid : kgid to be mapped
+ *
+ * Take a @kgid and remap it from @fs_userns into @mnt_userns. Use this
+ * function when preparing a @kgid to be reported to userspace.
+ *
+ * If no_idmapping() determines that this is not an idmapped mount we can
+ * simply return @kgid unchanged.
+ * If initial_idmapping() tells us that the filesystem is not mounted with an
+ * idmapping we know the value of @kgid won't change when calling
+ * from_kgid() so we can simply retrieve the value via __kgid_val()
+ * directly.
+ *
+ * Return: @kgid mapped according to @mnt_userns.
+ * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is
+ * returned.
+ */
+static inline kgid_t mapped_kgid_fs(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kgid_t kgid)
+{
+ gid_t gid;
+
+ if (no_idmapping(mnt_userns, fs_userns))
+ return kgid;
+ if (initial_idmapping(fs_userns))
+ gid = __kgid_val(kgid);
+ else
+ gid = from_kgid(fs_userns, kgid);
+ if (gid == (gid_t)-1)
+ return INVALID_GID;
+ return make_kgid(mnt_userns, gid);
+}
+
+/**
+ * mapped_kuid_user - map a user kuid into a mnt_userns
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @kuid : kuid to be mapped
+ *
+ * Use the idmapping of @mnt_userns to remap a @kuid into @fs_userns. Use this
+ * function when preparing a @kuid to be written to disk or inode.
+ *
+ * If no_idmapping() determines that this is not an idmapped mount we can
+ * simply return @kuid unchanged.
+ * If initial_idmapping() tells us that the filesystem is not mounted with an
+ * idmapping we know the value of @kuid won't change when calling
+ * make_kuid() so we can simply retrieve the value via KUIDT_INIT()
+ * directly.
+ *
+ * Return: @kuid mapped according to @mnt_userns.
+ * If @kuid has no mapping in either @mnt_userns or @fs_userns INVALID_UID is
+ * returned.
+ */
+static inline kuid_t mapped_kuid_user(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kuid_t kuid)
+{
+ uid_t uid;
+
+ if (no_idmapping(mnt_userns, fs_userns))
+ return kuid;
+ uid = from_kuid(mnt_userns, kuid);
+ if (uid == (uid_t)-1)
+ return INVALID_UID;
+ if (initial_idmapping(fs_userns))
+ return KUIDT_INIT(uid);
+ return make_kuid(fs_userns, uid);
+}
+
+/**
+ * mapped_kgid_user - map a user kgid into a mnt_userns
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ * @kgid : kgid to be mapped
+ *
+ * Use the idmapping of @mnt_userns to remap a @kgid into @fs_userns. Use this
+ * function when preparing a @kgid to be written to disk or inode.
+ *
+ * If no_idmapping() determines that this is not an idmapped mount we can
+ * simply return @kgid unchanged.
+ * If initial_idmapping() tells us that the filesystem is not mounted with an
+ * idmapping we know the value of @kgid won't change when calling
+ * make_kgid() so we can simply retrieve the value via KGIDT_INIT()
+ * directly.
+ *
+ * Return: @kgid mapped according to @mnt_userns.
+ * If @kgid has no mapping in either @mnt_userns or @fs_userns INVALID_GID is
+ * returned.
+ */
+static inline kgid_t mapped_kgid_user(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns,
+ kgid_t kgid)
+{
+ gid_t gid;
+
+ if (no_idmapping(mnt_userns, fs_userns))
+ return kgid;
+ gid = from_kgid(mnt_userns, kgid);
+ if (gid == (gid_t)-1)
+ return INVALID_GID;
+ if (initial_idmapping(fs_userns))
+ return KGIDT_INIT(gid);
+ return make_kgid(fs_userns, gid);
+}
+
+/**
+ * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ *
+ * Use this helper to initialize a new vfs or filesystem object based on
+ * the caller's fsuid. A common example is initializing the i_uid field of
+ * a newly allocated inode triggered by a creation event such as mkdir or
+ * O_CREAT. Other examples include the allocation of quotas for a specific
+ * user.
+ *
+ * Return: the caller's current fsuid mapped up according to @mnt_userns.
+ */
+static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns)
+{
+ return mapped_kuid_user(mnt_userns, fs_userns, current_fsuid());
+}
+
+/**
+ * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
+ * @mnt_userns: the mount's idmapping
+ * @fs_userns: the filesystem's idmapping
+ *
+ * Use this helper to initialize a new vfs or filesystem object based on
+ * the caller's fsgid. A common example is initializing the i_gid field of
+ * a newly allocated inode triggered by a creation event such as mkdir or
+ * O_CREAT. Other examples include the allocation of quotas for a specific
+ * user.
+ *
+ * Return: the caller's current fsgid mapped up according to @mnt_userns.
+ */
+static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns)
+{
+ return mapped_kgid_user(mnt_userns, fs_userns, current_fsgid());
+}
+
+#endif /* _LINUX_MNT_IDMAPPING_H */
diff --git a/security/commoncap.c b/security/commoncap.c
index 3f810d37b71b..5fc8986c3c77 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -24,6 +24,7 @@
#include <linux/user_namespace.h>
#include <linux/binfmts.h>
#include <linux/personality.h>
+#include <linux/mnt_idmapping.h>
/*
* If a non-root user executes a setuid-root binary in
@@ -418,7 +419,7 @@ int cap_inode_getsecurity(struct user_namespace *mnt_userns,
kroot = make_kuid(fs_ns, root);
/* If this is an idmapped mount shift the kuid. */
- kroot = kuid_into_mnt(mnt_userns, kroot);
+ kroot = mapped_kuid_fs(mnt_userns, fs_ns, kroot);
/* If the root kuid maps to a valid uid in current ns, then return
* this as a nscap. */
@@ -488,6 +489,7 @@ out_free:
* @size: size of @ivalue
* @task_ns: user namespace of the caller
* @mnt_userns: user namespace of the mount the inode was found from
+ * @fs_userns: user namespace of the filesystem
*
* If the inode has been found through an idmapped mount the user namespace of
* the vfsmount must be passed through @mnt_userns. This function will then
@@ -497,7 +499,8 @@ out_free:
*/
static kuid_t rootid_from_xattr(const void *value, size_t size,
struct user_namespace *task_ns,
- struct user_namespace *mnt_userns)
+ struct user_namespace *mnt_userns,
+ struct user_namespace *fs_userns)
{
const struct vfs_ns_cap_data *nscap = value;
kuid_t rootkid;
@@ -507,7 +510,7 @@ static kuid_t rootid_from_xattr(const void *value, size_t size,
rootid = le32_to_cpu(nscap->rootid);
rootkid = make_kuid(task_ns, rootid);
- return kuid_from_mnt(mnt_userns, rootkid);
+ return mapped_kuid_user(mnt_userns, fs_userns, rootkid);
}
static bool validheader(size_t size, const struct vfs_cap_data *cap)
@@ -553,12 +556,12 @@ int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
return -EINVAL;
if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
return -EPERM;
- if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns))
+ if (size == XATTR_CAPS_SZ_2 && (mnt_userns == fs_ns))
if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
/* user is privileged, just write the v2 */
return size;
- rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns);
+ rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns, fs_ns);
if (!uid_valid(rootid))
return -EINVAL;
@@ -699,7 +702,7 @@ int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
/* Limit the caps to the mounter of the filesystem
* or the more limited uid specified in the xattr.
*/
- rootkuid = kuid_into_mnt(mnt_userns, rootkuid);
+ rootkuid = mapped_kuid_fs(mnt_userns, fs_ns, rootkuid);
if (!rootid_owns_currentns(rootkuid))
return -ENODATA;