#!/bin/bash
#
# air-tailscale — strict allowlist wrapper around /usr/bin/tailscale for the
# `cast` service user. Invoked via sudo from cast-server; sudoers restricts
# that service user to exactly this path (/etc/sudoers.d/air-vpn).
#
# Why a wrapper?
# --------------
# sudoers argument matching cannot express "permit `tailscale up` with ANY
# args EXCEPT --ssh". `tailscale up --ssh` enables SSH-over-tailnet, which
# would give any peer on the tailnet a root shell — game-over for the drone.
# This wrapper validates argv before exec under a strict allowlist and refuses
# anything outside it.
#
# Subcommand allowlist:
#   up      — accepts only the flags listed in ALLOWED_UP_FLAGS below.
#   down    — no extra arguments permitted.
#   status  — no extra arguments permitted.
#   logout  — no extra arguments permitted.
#   ip      — only `-4` permitted (matches what cast-server queries).
#
# All other subcommands (set, funnel, serve, ssh, file, debug, ...) and all
# unknown flags are refused with a non-zero exit and a clear stderr message.
#
# The final argv is echoed to stderr before exec for audit purposes; sudo
# captures stderr to the journal under the cast.service unit.
#

set -euo pipefail

TAILSCALE=/usr/bin/tailscale
PROG=air-tailscale

die() {
    echo "${PROG}: $*" >&2
    exit 2
}

if [ $# -lt 1 ]; then
    die "missing subcommand"
fi

subcmd=$1
shift

# Credential file the `up` subcommand reads TS_AUTHKEY from. cast-server
# writes it as the `air` user with 0600 perms; we read + unlink here (as
# root, after sudo) before exec'ing tailscale so the secret never ends up
# in argv, in this shell's environment, or on disk after the `up` attempt.
# Kept in sync with cast-server/src/services/vpn.rs::TS_AUTHKEY_FILE.
TS_AUTHKEY_FILE=/run/air/ts-authkey

# Validate `up` flags via an explicit allowlist.
validate_up_args() {
    local arg
    for arg in "$@"; do
        case "${arg}" in
            --accept-routes|--reset)
                ;;
            --authkey=*)
                local val="${arg#--authkey=}"
                # alphanumeric + dash + underscore only — no shell metas, no
                # whitespace, no quoting tricks.
                #
                # Length cap (Round 21): a real Tailscale auth key is on the
                # order of 50–80 characters (`tskey-auth-...`). The previous
                # un-bounded `+` quantifier let a hostile or buggy upstream
                # push a multi-megabyte string through the regex engine
                # before we even decided to reject it. Cap at 256 bytes:
                # comfortably above any real key length, well below the
                # point where regex backtracking on the line buffer becomes
                # an issue.
                if ! [[ "${val}" =~ ^[A-Za-z0-9_-]{20,256}$ ]]; then
                    die "--authkey value malformed (must be 20–256 chars of [A-Za-z0-9_-])"
                fi
                ;;
            --hostname=*)
                local val="${arg#--hostname=}"
                if ! [[ "${val}" =~ ^[a-zA-Z0-9-]{1,63}$ ]]; then
                    die "--hostname must match ^[a-zA-Z0-9-]{1,63}$"
                fi
                ;;
            --login-server=*)
                local val="${arg#--login-server=}"
                # Must be an https URL pointing at a known control plane.
                if ! [[ "${val}" =~ ^https://[A-Za-z0-9._-]+(/.*)?$ ]]; then
                    die "--login-server must be an https:// URL"
                fi
                # Extract hostname from the validated URL
                local host="${val#https://}"
                host="${host%%/*}"  # strip path
                host="${host%%:*}"  # strip port
                # Anchored domain check: prevent evil-headscale.com from matching
                case "${host}" in
                    tailscale.com|*.tailscale.com) ;;
                    headscale.net|*.headscale.net) ;;
                    *) die "--login-server host '${host}' not in domain allowlist (tailscale.com, headscale.net). For self-hosted Headscale on a custom domain, edit this wrapper." ;;
                esac
                ;;
            # Explicit-deny short-circuits for the highest-impact flags so
            # the audit log says exactly *why* a call was rejected. The
            # default branch below would catch them anyway.
            --ssh|--ssh=*|-ssh)
                die "flag --ssh is not permitted"
                ;;
            --exit-node|--exit-node=*)
                die "flag --exit-node is not permitted"
                ;;
            --exit-node-allow-lan-access|--exit-node-allow-lan-access=*)
                die "flag --exit-node-allow-lan-access is not permitted"
                ;;
            --advertise-exit-node|--advertise-exit-node=*)
                die "flag --advertise-exit-node is not permitted"
                ;;
            --advertise-routes|--advertise-routes=*)
                die "flag --advertise-routes is not permitted"
                ;;
            --operator|--operator=*)
                die "flag --operator is not permitted"
                ;;
            --shields-up|--shields-up=*)
                die "flag --shields-up is not permitted"
                ;;
            --*)
                die "flag '${arg}' is not in the allowlist"
                ;;
            *)
                die "positional argument '${arg}' is not permitted"
                ;;
        esac
    done
}

case "${subcmd}" in
    up)
        validate_up_args "$@"
        # If cast-server staged a credential file, load it into TS_AUTHKEY
        # for the child tailscale process and unlink the file immediately.
        # Ownership must be root after the atomic rename or air's write;
        # we're already running as root via sudo, so read permission is
        # unconditional. The unlink happens BEFORE exec so even a kernel
        # crash of the child leaves no plaintext key on disk.
        #
        # The read+unlink runs under an exclusive flock on a dedicated
        # lockfile. Without it, two concurrent `air-tailscale up`
        # invocations could race: invocation A opens the file and reads
        # TS_AUTHKEY, then invocation B reads the SAME bytes before A's
        # `rm` lands, and now both processes try to enrol with the same
        # one-time auth key — Tailscale rejects the second one and the
        # operator sees a cryptic "auth key already used" error. The
        # flock makes read+unlink atomic w.r.t. other instances of this
        # script. We release the lock immediately after the unlink so
        # the multi-second `tailscale up` call doesn't serialise
        # unrelated future invocations.
        TS_AUTHKEY_LOCK=/run/air/ts-authkey.lock
        # /run/air is staged by tmpfiles.d (/etc/tmpfiles.d/air.conf)
        # at boot. If it's missing we'd hit a cryptic "No such file or
        # directory" on the fd redirect below; catch it early with an
        # actionable message instead.
        if [ ! -d /run/air ]; then
            die "/run/air not present — run 'systemd-tmpfiles --create' or reboot \
so tmpfiles.d stages the directory before invoking this wrapper"
        fi
        # Force 0600 perms on the lockfile and ensure it exists BEFORE we
        # redirect onto it. Naïve `umask 0077 + >file` only applies the
        # mode on creation — a pre-existing 0644 lockfile (from an
        # earlier build, or an unrelated tool) would stay 0644.
        #
        # We must NOT replace the inode here: `install` / rename-over
        # swaps the inode, and concurrent invocations of this wrapper
        # would then flock DIFFERENT inodes (A locks old, B replaces +
        # locks new → no mutual exclusion). Use touch (creates if
        # missing, preserves inode if present) + chmod (idempotent mode
        # fixup) — this keeps the flock cross-process synchronisation
        # the whole point of the lockfile.
        ( umask 0077; : >>"${TS_AUTHKEY_LOCK}" ) \
            || die "could not stage ${TS_AUTHKEY_LOCK}"
        chmod 0600 "${TS_AUTHKEY_LOCK}" \
            || die "could not chmod 0600 ${TS_AUTHKEY_LOCK}"
        # Open lockfile on a high fd; `-w 30` caps wait so a stuck peer
        # can't wedge us forever but we still give a legitimately slow
        # competing call room to finish. Raised from 5 s: `tailscale up`
        # can block for 10–20 s on a cellular or degraded link while the
        # control plane handshakes, and the previous 5 s ceiling made the
        # second operator-triggered reconfigure on a flaky network fail
        # with "could not acquire lock" when the first call was still
        # inside its own `up`. Use `>>` (O_APPEND), not `>`, so a
        # transient truncate race against a concurrent holder cannot
        # blow away state — in practice the lockfile is always empty,
        # but append mode is the safer default.
        exec {lock_fd}>>"${TS_AUTHKEY_LOCK}"
        flock -x -w 30 "${lock_fd}" || die "could not acquire ts-authkey lock within 30s"
        if [ -s "${TS_AUTHKEY_FILE}" ]; then
            TS_AUTHKEY="$(tr -d '\r\n' < "${TS_AUTHKEY_FILE}")"
            rm -f "${TS_AUTHKEY_FILE}"
            if [ -n "${TS_AUTHKEY}" ]; then
                export TS_AUTHKEY
            else
                unset TS_AUTHKEY
            fi
        fi
        # Close the fd → release the flock. Holding it across the
        # `exec tailscale up` below would block the next invocation
        # for the entire duration of the network call (10–30 s on a
        # cellular link).
        exec {lock_fd}>&-
        ;;
    down|status|logout)
        if [ $# -ne 0 ]; then
            die "subcommand '${subcmd}' takes no arguments"
        fi
        ;;
    ip)
        # cast-server only ever calls `tailscale ip -4`. Pin it.
        if [ $# -ne 1 ] || [ "${1:-}" != "-4" ]; then
            die "subcommand 'ip' only accepts the single arg '-4'"
        fi
        ;;
    *)
        die "subcommand '${subcmd}' not permitted"
        ;;
esac

# Audit trail: log the exact argv we are about to exec. Goes to journald via
# the cast.service unit because sudo inherits stderr from the caller.
#
# Use printf with %q on each arg so a value containing spaces or shell metas
# is escaped instead of word-splitting against the log line. The previous
# `echo "${PROG}: exec ${TAILSCALE} ${subcmd} "$@""` form closed the outer
# double-quote on the embedded "$@", leaving $@ unquoted — args with spaces
# split, and a literal "*" globbed against CWD (which is "/" under sudo).
{
    printf '%s: exec %s %s' "${PROG}" "${TAILSCALE}" "${subcmd}"
    if [ "$#" -gt 0 ]; then
        printf ' %q' "$@"
    fi
    printf '\n'
} >&2

exec "${TAILSCALE}" "${subcmd}" "$@"
