mirror of
git://git.yoctoproject.org/linux-yocto.git
synced 2025-10-22 23:13:01 +02:00

Anonymous inodes may never ever be exectuable and the only way to enforce this is to raise SB_I_NOEXEC on the superblock which can never be unset. I've made the exec code yell at anyone who does not abide by this rule. For good measure also kill any pretense that device nodes are supported on the secretmem filesystem. > WARNING: fs/exec.c:119 at path_noexec+0x1af/0x200 fs/exec.c:118, CPU#1: syz-executor260/5835 > Modules linked in: > CPU: 1 UID: 0 PID: 5835 Comm: syz-executor260 Not tainted 6.16.0-rc4-next-20250703-syzkaller #0 PREEMPT(full) > Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/07/2025 > RIP: 0010:path_noexec+0x1af/0x200 fs/exec.c:118 > Code: 02 31 ff 48 89 de e8 f0 b1 89 ff d1 eb eb 07 e8 07 ad 89 ff b3 01 89 d8 5b 41 5e 41 5f 5d c3 cc cc cc cc cc e8 f2 ac 89 ff 90 <0f> 0b 90 e9 48 ff ff ff 44 89 f1 80 e1 07 80 c1 03 38 c1 0f 8c a6 > RSP: 0018:ffffc90003eefbd8 EFLAGS: 00010293 > RAX: ffffffff8235f22e RBX: ffff888072be0940 RCX: ffff88807763bc00 > RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 > RBP: 0000000000080000 R08: ffff88807763bc00 R09: 0000000000000003 > R10: 0000000000000003 R11: 0000000000000000 R12: 0000000000000011 > R13: 1ffff920007ddf90 R14: 0000000000000000 R15: dffffc0000000000 > FS: 000055556832d380(0000) GS:ffff888125d1e000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > CR2: 00007f21e34810d0 CR3: 00000000718a8000 CR4: 00000000003526f0 > Call Trace: > <TASK> > do_mmap+0xa43/0x10d0 mm/mmap.c:472 > vm_mmap_pgoff+0x31b/0x4c0 mm/util.c:579 > ksys_mmap_pgoff+0x51f/0x760 mm/mmap.c:607 > do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline] > do_syscall_64+0xfa/0x3b0 arch/x86/entry/syscall_64.c:94 > entry_SYSCALL_64_after_hwframe+0x77/0x7f > RIP: 0033:0x7f21e340a9f9 > Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 c1 17 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48 > RSP: 002b:00007ffd23ca3468 EFLAGS: 00000246 ORIG_RAX: 0000000000000009 > RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f21e340a9f9 > RDX: 0000000000000000 RSI: 0000000000004000 RDI: 0000200000ff9000 > RBP: 00007f21e347d5f0 R08: 0000000000000003 R09: 0000000000000000 > R10: 0000000000000011 R11: 0000000000000246 R12: 0000000000000001 > R13: 431bde82d7b634db R14: 0000000000000001 R15: 0000000000000001 Link: https://lore.kernel.org/686ba948.a00a0220.c7b3.0080.GAE@google.com Signed-off-by: Christian Brauner <brauner@kernel.org>
293 lines
6.3 KiB
C
293 lines
6.3 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
/*
|
|
* Copyright IBM Corporation, 2021
|
|
*
|
|
* Author: Mike Rapoport <rppt@linux.ibm.com>
|
|
*/
|
|
|
|
#include <linux/mm.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/swap.h>
|
|
#include <linux/mount.h>
|
|
#include <linux/memfd.h>
|
|
#include <linux/bitops.h>
|
|
#include <linux/printk.h>
|
|
#include <linux/pagemap.h>
|
|
#include <linux/syscalls.h>
|
|
#include <linux/pseudo_fs.h>
|
|
#include <linux/secretmem.h>
|
|
#include <linux/set_memory.h>
|
|
#include <linux/sched/signal.h>
|
|
|
|
#include <uapi/linux/magic.h>
|
|
|
|
#include <asm/tlbflush.h>
|
|
|
|
#include "internal.h"
|
|
|
|
#undef pr_fmt
|
|
#define pr_fmt(fmt) "secretmem: " fmt
|
|
|
|
/*
|
|
* Define mode and flag masks to allow validation of the system call
|
|
* parameters.
|
|
*/
|
|
#define SECRETMEM_MODE_MASK (0x0)
|
|
#define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK
|
|
|
|
static bool secretmem_enable __ro_after_init = 1;
|
|
module_param_named(enable, secretmem_enable, bool, 0400);
|
|
MODULE_PARM_DESC(secretmem_enable,
|
|
"Enable secretmem and memfd_secret(2) system call");
|
|
|
|
static atomic_t secretmem_users;
|
|
|
|
bool secretmem_active(void)
|
|
{
|
|
return !!atomic_read(&secretmem_users);
|
|
}
|
|
|
|
static vm_fault_t secretmem_fault(struct vm_fault *vmf)
|
|
{
|
|
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
|
|
struct inode *inode = file_inode(vmf->vma->vm_file);
|
|
pgoff_t offset = vmf->pgoff;
|
|
gfp_t gfp = vmf->gfp_mask;
|
|
unsigned long addr;
|
|
struct page *page;
|
|
struct folio *folio;
|
|
vm_fault_t ret;
|
|
int err;
|
|
|
|
if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
|
|
return vmf_error(-EINVAL);
|
|
|
|
filemap_invalidate_lock_shared(mapping);
|
|
|
|
retry:
|
|
page = find_lock_page(mapping, offset);
|
|
if (!page) {
|
|
folio = folio_alloc(gfp | __GFP_ZERO, 0);
|
|
if (!folio) {
|
|
ret = VM_FAULT_OOM;
|
|
goto out;
|
|
}
|
|
|
|
page = &folio->page;
|
|
err = set_direct_map_invalid_noflush(page);
|
|
if (err) {
|
|
folio_put(folio);
|
|
ret = vmf_error(err);
|
|
goto out;
|
|
}
|
|
|
|
__folio_mark_uptodate(folio);
|
|
err = filemap_add_folio(mapping, folio, offset, gfp);
|
|
if (unlikely(err)) {
|
|
folio_put(folio);
|
|
/*
|
|
* If a split of large page was required, it
|
|
* already happened when we marked the page invalid
|
|
* which guarantees that this call won't fail
|
|
*/
|
|
set_direct_map_default_noflush(page);
|
|
if (err == -EEXIST)
|
|
goto retry;
|
|
|
|
ret = vmf_error(err);
|
|
goto out;
|
|
}
|
|
|
|
addr = (unsigned long)page_address(page);
|
|
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
|
|
}
|
|
|
|
vmf->page = page;
|
|
ret = VM_FAULT_LOCKED;
|
|
|
|
out:
|
|
filemap_invalidate_unlock_shared(mapping);
|
|
return ret;
|
|
}
|
|
|
|
static const struct vm_operations_struct secretmem_vm_ops = {
|
|
.fault = secretmem_fault,
|
|
};
|
|
|
|
static int secretmem_release(struct inode *inode, struct file *file)
|
|
{
|
|
atomic_dec(&secretmem_users);
|
|
return 0;
|
|
}
|
|
|
|
static int secretmem_mmap_prepare(struct vm_area_desc *desc)
|
|
{
|
|
const unsigned long len = desc->end - desc->start;
|
|
|
|
if ((desc->vm_flags & (VM_SHARED | VM_MAYSHARE)) == 0)
|
|
return -EINVAL;
|
|
|
|
if (!mlock_future_ok(desc->mm, desc->vm_flags | VM_LOCKED, len))
|
|
return -EAGAIN;
|
|
|
|
desc->vm_flags |= VM_LOCKED | VM_DONTDUMP;
|
|
desc->vm_ops = &secretmem_vm_ops;
|
|
|
|
return 0;
|
|
}
|
|
|
|
bool vma_is_secretmem(struct vm_area_struct *vma)
|
|
{
|
|
return vma->vm_ops == &secretmem_vm_ops;
|
|
}
|
|
|
|
static const struct file_operations secretmem_fops = {
|
|
.release = secretmem_release,
|
|
.mmap_prepare = secretmem_mmap_prepare,
|
|
};
|
|
|
|
static int secretmem_migrate_folio(struct address_space *mapping,
|
|
struct folio *dst, struct folio *src, enum migrate_mode mode)
|
|
{
|
|
return -EBUSY;
|
|
}
|
|
|
|
static void secretmem_free_folio(struct folio *folio)
|
|
{
|
|
set_direct_map_default_noflush(&folio->page);
|
|
folio_zero_segment(folio, 0, folio_size(folio));
|
|
}
|
|
|
|
const struct address_space_operations secretmem_aops = {
|
|
.dirty_folio = noop_dirty_folio,
|
|
.free_folio = secretmem_free_folio,
|
|
.migrate_folio = secretmem_migrate_folio,
|
|
};
|
|
|
|
static int secretmem_setattr(struct mnt_idmap *idmap,
|
|
struct dentry *dentry, struct iattr *iattr)
|
|
{
|
|
struct inode *inode = d_inode(dentry);
|
|
struct address_space *mapping = inode->i_mapping;
|
|
unsigned int ia_valid = iattr->ia_valid;
|
|
int ret;
|
|
|
|
filemap_invalidate_lock(mapping);
|
|
|
|
if ((ia_valid & ATTR_SIZE) && inode->i_size)
|
|
ret = -EINVAL;
|
|
else
|
|
ret = simple_setattr(idmap, dentry, iattr);
|
|
|
|
filemap_invalidate_unlock(mapping);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static const struct inode_operations secretmem_iops = {
|
|
.setattr = secretmem_setattr,
|
|
};
|
|
|
|
static struct vfsmount *secretmem_mnt;
|
|
|
|
static struct file *secretmem_file_create(unsigned long flags)
|
|
{
|
|
struct file *file;
|
|
struct inode *inode;
|
|
const char *anon_name = "[secretmem]";
|
|
|
|
inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL);
|
|
if (IS_ERR(inode))
|
|
return ERR_CAST(inode);
|
|
|
|
file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
|
|
O_RDWR, &secretmem_fops);
|
|
if (IS_ERR(file))
|
|
goto err_free_inode;
|
|
|
|
mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
|
|
mapping_set_unevictable(inode->i_mapping);
|
|
|
|
inode->i_op = &secretmem_iops;
|
|
inode->i_mapping->a_ops = &secretmem_aops;
|
|
|
|
/* pretend we are a normal file with zero size */
|
|
inode->i_mode |= S_IFREG;
|
|
inode->i_size = 0;
|
|
|
|
return file;
|
|
|
|
err_free_inode:
|
|
iput(inode);
|
|
return file;
|
|
}
|
|
|
|
SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
|
|
{
|
|
struct file *file;
|
|
int fd, err;
|
|
|
|
/* make sure local flags do not confict with global fcntl.h */
|
|
BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
|
|
|
|
if (!secretmem_enable || !can_set_direct_map())
|
|
return -ENOSYS;
|
|
|
|
if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
|
|
return -EINVAL;
|
|
if (atomic_read(&secretmem_users) < 0)
|
|
return -ENFILE;
|
|
|
|
fd = get_unused_fd_flags(flags & O_CLOEXEC);
|
|
if (fd < 0)
|
|
return fd;
|
|
|
|
file = secretmem_file_create(flags);
|
|
if (IS_ERR(file)) {
|
|
err = PTR_ERR(file);
|
|
goto err_put_fd;
|
|
}
|
|
|
|
file->f_flags |= O_LARGEFILE;
|
|
|
|
atomic_inc(&secretmem_users);
|
|
fd_install(fd, file);
|
|
return fd;
|
|
|
|
err_put_fd:
|
|
put_unused_fd(fd);
|
|
return err;
|
|
}
|
|
|
|
static int secretmem_init_fs_context(struct fs_context *fc)
|
|
{
|
|
struct pseudo_fs_context *ctx;
|
|
|
|
ctx = init_pseudo(fc, SECRETMEM_MAGIC);
|
|
if (!ctx)
|
|
return -ENOMEM;
|
|
|
|
fc->s_iflags |= SB_I_NOEXEC;
|
|
fc->s_iflags |= SB_I_NODEV;
|
|
return 0;
|
|
}
|
|
|
|
static struct file_system_type secretmem_fs = {
|
|
.name = "secretmem",
|
|
.init_fs_context = secretmem_init_fs_context,
|
|
.kill_sb = kill_anon_super,
|
|
};
|
|
|
|
static int __init secretmem_init(void)
|
|
{
|
|
if (!secretmem_enable || !can_set_direct_map())
|
|
return 0;
|
|
|
|
secretmem_mnt = kern_mount(&secretmem_fs);
|
|
if (IS_ERR(secretmem_mnt))
|
|
return PTR_ERR(secretmem_mnt);
|
|
|
|
return 0;
|
|
}
|
|
fs_initcall(secretmem_init);
|