#![allow(clippy::undocumented_unsafe_blocks)]

use std::{
    env, mem,
    os::{
        fd::{AsRawFd, FromRawFd, OwnedFd, RawFd},
        unix::ffi::OsStrExt,
    },
    ptr,
};

use btoi::btoi;
use libloading::os::unix::Symbol;
use nix::{
    errno::Errno,
    libc,
    sys::signal::{kill, sigprocmask, SigSet, SigmaskHow, Signal},
    unistd::{close, getpid, read, write},
};

use crate::{
    caps,
    config::*,
    fs::retry_on_eintr,
    safe_drop_cap,
    unshare::{
        error::ErrorCode as Err,
        run::{ChildInfo, Exe},
    },
};

unsafe fn fail_errno(code: Err, errno: i32) -> ! {
    let msg = match code {
        Err::CapSet => c"syd: capset error".as_ptr(),
        Err::Exec => c"syd: exec error".as_ptr(),
        Err::ParentDeathSignal => c"syd: parent-death-signal error".as_ptr(),
        Err::PreExec => c"syd: pre-exec error".as_ptr(),
        Err::ProcessStop => c"syd: error stopping process".as_ptr(),
        Err::ResetSignal => c"syd: error reseting signals".as_ptr(),
        Err::Seccomp => c"syd: seccomp error".as_ptr(),
        Err::SeccompSendFd => c"syd: seccomp send notify-fd error".as_ptr(),
        Err::SeccompWaitFd => c"syd: seccomp wait for notify-fd error".as_ptr(),
        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
        Err::SetTSC => c"syd: set-tsc error".as_ptr(),
    };
    Errno::set_raw(errno);
    libc::perror(msg as *const libc::c_char);
    libc::_exit(errno);
}

macro_rules! fail_safe {
    ($child:expr, $error:expr) => {
        let errno = Errno::last_raw();
        unsafe { fail_errno($error, errno) }
    };
}

macro_rules! fail_errno_safe {
    ($child:expr, $error:expr, $errno:expr) => {
        unsafe { fail_errno($error, $errno) }
    };
}

#[allow(clippy::cognitive_complexity)]
pub extern "C" fn child_after_clone(arg: *mut libc::c_void) -> libc::c_int {
    // SAFETY: Do not leak the static file descriptors to the sandbox process.
    proc_close();

    // SAFETY: Do not leak the log file descriptor to the sandbox process.
    let log_fd = if let Some(fd) = env::var_os(ENV_LOG_FD) {
        btoi::<RawFd>(fd.as_bytes()).ok()
    } else {
        None
    };
    if let Some(log_fd) = log_fd {
        let _ = close(log_fd);
    }

    // SAFETY: Clean Syd environment variables from process environment.
    // Note, we have just used ENV_LOG_FD above and do not need it anymore.
    for (key, _) in env::vars_os() {
        if key.as_bytes().starts_with(b"SYD_") && !key.as_bytes().starts_with(b"SYD_TEST_") {
            env::remove_var(key);
        }
    }

    // SAFETY: arg is a valid ChildInfo structure.
    let child: Box<ChildInfo> = unsafe { Box::from_raw(arg as *mut ChildInfo) };

    // We'll write seccomp notify fd to the second pipe,
    // and read the acknowledgement notification from
    // the first pipe.
    let (pipe_ro, pipe_rw) = (child.seccomp_pipefd.0 .0, child.seccomp_pipefd.1 .1);

    // Close the unused ends of the pipes.
    drop(child.seccomp_pipefd.0 .1);
    drop(child.seccomp_pipefd.1 .0);

    if let Some(&sig) = child.cfg.death_sig.as_ref() {
        if let Err(errno) = Errno::result(unsafe {
            libc::prctl(libc::PR_SET_PDEATHSIG, sig as libc::c_ulong, 0, 0, 0)
        }) {
            fail_errno_safe!(child, Err::ParentDeathSignal, errno as i32);
        }
    }

    if child.cfg.restore_sigmask {
        // Reset blocking signals.
        // Step 1: Reset the signal mask using pthread_sigmask.
        unsafe {
            let mut sigmask: libc::sigset_t = mem::zeroed();
            libc::sigemptyset(&mut sigmask);
            libc::pthread_sigmask(libc::SIG_SETMASK, &sigmask, ptr::null_mut());
        }
        // Step 2: Unblock all signals using sigprocmask.
        let sigmask = SigSet::all();
        if let Err(errno) = sigprocmask(SigmaskHow::SIG_UNBLOCK, Some(&sigmask), None) {
            fail_errno_safe!(child, Err::ResetSignal, errno as i32);
        }

        // Reset all signals to their default dispositions.
        if let Err(errno) = crate::reset_signals() {
            fail_errno_safe!(child, Err::ResetSignal, errno as i32);
        }
    }

    if let Some(callback) = &child.pre_exec {
        if let Err(errno) = callback() {
            fail_errno_safe!(child, Err::PreExec, errno as i32);
        }
    }

    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    if child.cfg.deny_tsc {
        if let Err(errno) =
            Errno::result(unsafe { libc::prctl(libc::PR_SET_TSC, libc::PR_TSC_SIGSEGV) })
        {
            fail_errno_safe!(child, Err::SetTSC, errno as i32);
        }
    }

    if child.cfg.stop {
        // Stop the process to give the parent a chance to seize us and set ptrace options.
        // This must happen _before_ loading the seccomp filter.
        if let Err(errno) = kill(getpid(), Signal::SIGSTOP) {
            fail_errno_safe!(child, Err::ProcessStop, errno as i32);
        }
    }

    if let Some(seccomp_filter) = child.seccomp_filter {
        // Load the seccomp filter.
        if let Err(scmp_err) = seccomp_filter.load() {
            fail_errno_safe!(
                child,
                Err::Seccomp,
                scmp_err
                    .sysrawrc()
                    .map(|errno| errno.abs())
                    .unwrap_or_else(|| Errno::last() as i32)
            );
        }

        // Get seccomp notification fd.
        let seccomp_fd = match seccomp_filter.get_notify_fd() {
            Ok(fd) => {
                // SAFETY: get_notify_fd returns a valid FD.
                unsafe { OwnedFd::from_raw_fd(fd) }
            }
            Err(scmp_err) => fail_errno_safe!(
                child,
                Err::Seccomp,
                scmp_err
                    .sysrawrc()
                    .map(|errno| errno.abs())
                    .unwrap_or_else(|| Errno::last() as i32)
            ),
        };

        // Write the value of the seccomp notify fd to the pipe.
        // Handle partial writes and interrupts.
        // EOF means parent died before reading.
        let fd = seccomp_fd.as_raw_fd().to_le_bytes();
        let mut nwrite = 0;
        while nwrite < fd.len() {
            #[allow(clippy::arithmetic_side_effects)]
            match retry_on_eintr(|| write(&pipe_rw, &fd[nwrite..])) {
                Ok(0) => {
                    // Parent died before reading.
                    // This should ideally never happen.
                    fail_errno_safe!(child, Err::SeccompSendFd, Errno::EIO as i32);
                }
                Ok(n) => nwrite += n,
                Err(errno) => fail_errno_safe!(child, Err::SeccompSendFd, errno as i32),
            }
        }

        // Close the write end of the pipe.
        drop(pipe_rw);

        // Wait for the parent to get the file descriptor.
        // Handle interrupts.
        // Partial read is not possible.
        // EOF means parent died before writing to the pipe.
        let mut buf = [0u8; 1];
        match retry_on_eintr(|| read(&pipe_ro, &mut buf[..])) {
            Ok(0) => {
                // Parent died before writing.
                // This should ideally never happen.
                fail_errno_safe!(child, Err::SeccompWaitFd, Errno::EIO as i32);
            }
            Ok(1) if buf[0] == 42 => {
                // Parent received seccomp fd successfully.
                // We can go ahead and close our copy now.
            }
            Ok(_) => unreachable!("BUG: The meaning of life is not {:#x}!", buf[0]),
            Err(errno) => fail_errno_safe!(child, Err::SeccompWaitFd, errno as i32),
        }

        // Close our copy of the seccomp-notify fd.
        // Parent process has already acknowledged that
        // it has received a copy of this fd.
        drop(seccomp_fd);

        // Release resources for seccomp BPF filter.
        // Memory allocation/deallocation is OK here
        // now that we have transferred over the
        // seccomp-notify fd to the parent process.
        // Otherwise we'd risk breaking Memory sandboxing.
        drop(seccomp_filter);

        // Close the read end of the pipe.
        drop(pipe_ro);
    } else {
        // Close unused ends of the pipes.
        drop(pipe_ro);
        drop(pipe_rw);
    }

    // Drop CAP_SYS_PTRACE late as Syd may need it.
    if !child.cfg.keep && safe_drop_cap(caps::Capability::CAP_SYS_PTRACE).is_err() {
        fail_safe!(child, Err::CapSet);
    }
    if caps::securebits::set_keepcaps(true).is_err() {
        fail_safe!(child, Err::CapSet);
    }

    // TODO:
    // Set the new secure bits:
    // 1. SECBIT_EXEC_RESTRICT_FILE
    // 2. SECBIT_EXEC_DENY_INTERACTIVE
    // upon user configuration.
    // See: https://docs.kernel.org/userspace-api/check_exec.html
    // Note, we already use AT_EXECVE_CHECK.

    match child.exe {
        Exe::Library(lib) => unsafe {
            let fun: Symbol<unsafe extern "C" fn() -> i32> = match lib.get(b"syd_main") {
                Ok(fun) => fun,
                Err(_) => nix::libc::_exit(nix::libc::EINVAL),
            };
            nix::libc::_exit(fun());
        },
        Exe::Program((filename, ref args)) => {
            let args = &args[..];
            unsafe { libc::execvp(filename, args.as_ptr()) };
            fail_safe!(child, Err::Exec);
        }
    }
}
