//
// Syd: rock-solid application kernel
// src/kernel/memfd.rs: memfd_create(2) handler
//
// Copyright (c) 2023, 2024, 2025 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

use std::ffi::CStr;

use libseccomp::ScmpNotifResp;
use nix::errno::Errno;

use crate::{
    cookie::safe_memfd_create,
    fs::{MFD_CLOEXEC, MFD_EXEC, MFD_NOEXEC_SEAL},
    hook::{RemoteProcess, UNotifyEventRequest},
    kernel::sandbox_path,
    path::XPathBuf,
    sandbox::Capability,
};

pub(crate) fn sys_memfd_create(request: UNotifyEventRequest) -> ScmpNotifResp {
    syscall_handler!(request, |request: UNotifyEventRequest| {
        const NAME_MAX: usize = 255;
        const MFD_NAME_PREFIX: &[u8] = b"/memfd:"; // The slash is not included in the limit.
        const MFD_NAME_PREFIX_LEN: usize = MFD_NAME_PREFIX.len() - 1;
        const MFD_NAME_MAX_LEN: usize = NAME_MAX - MFD_NAME_PREFIX_LEN;

        let req = request.scmpreq;
        let addr = req.data.args[0];
        if addr == 0 {
            // SAFETY: Return EFAULT for NULL name.
            return Err(Errno::EFAULT);
        }

        // If sandboxing for create capability is off, return immediately.
        let sandbox = request.get_sandbox();
        let check = sandbox.enabled(Capability::CAP_CREATE);
        let force_cloexec = sandbox.force_cloexec();
        let force_rand_fd = sandbox.force_rand_fd();
        let restrict_memfd = !sandbox.allow_unsafe_memfd();

        #[allow(clippy::cast_possible_truncation)]
        let mut flags = req.data.args[1] as libc::c_uint;
        if restrict_memfd {
            // SAFETY: Drop the executable flag and seal as nonexecutable.
            flags &= !MFD_EXEC;
            flags |= MFD_NOEXEC_SEAL;
        }

        let mut buf = [0u8; MFD_NAME_MAX_LEN];
        request.read_mem(&mut buf, addr)?;

        // SAFETY: If buffer has no null byte, return EINVAL as the path
        // is too long for us to handle.
        let name = CStr::from_bytes_until_nul(&buf).or(Err(Errno::EINVAL))?;

        // SAFETY: If name starts with `syd', return EINVAL as these
        // memory file descriptors are for Syd's internal use.
        if name.to_bytes().starts_with(b"syd") {
            return Err(Errno::EINVAL);
        }

        if check {
            // `check` may be false if restrict_memfd=1.
            // Check for access by appending the memfd prefix.
            let mut path = XPathBuf::from(MFD_NAME_PREFIX);
            path.append_bytes(name.to_bytes());

            // Unused when request.is_some()
            let process = RemoteProcess::new(request.scmpreq.pid());

            sandbox_path(
                Some(&request),
                &sandbox,
                &process,
                &path,
                Capability::CAP_CREATE,
                false,
                "memfd_create",
            )
            .or(Err(Errno::EACCES))?;
        }
        drop(sandbox); // release the read-lock.

        // Set CLOEXEC for our fd always, and
        // Set CLOEXEC for remote fd as necessary.
        let cloexec = force_cloexec || (flags & MFD_CLOEXEC != 0);
        flags |= MFD_CLOEXEC;

        // Access granted, emulate call and return the fd to the process.
        match safe_memfd_create(name.to_bytes_with_nul(), flags) {
            Ok(fd) => request.send_fd(fd, cloexec, force_rand_fd),
            Err(Errno::EINVAL) => {
                // Return ENOSYS rather than EINVAL if MFD_NOEXEC_SEAL is unsupported.
                // FIXME: This case includes other invalid flag combinations!
                Err(Errno::ENOSYS)
            }
            Err(errno) => Err(errno),
        }
    })
}
