#!/bin/bash
#
# air-ota-apply
#
# Privileged OTA application helper. Runs as root, invoked ONLY by the
# oneshot unit /etc/systemd/system/air-ota-apply.service, which itself
# is started by cast-server via:
#
#     sudo -n /bin/systemctl start --wait air-ota-apply.service
#
# (allowed by /etc/sudoers.d/air-ota, exact argv).
#
# # Why it exists
#
# cast.service runs under ProtectSystem=strict with ReadWritePaths
# limited to /var/lib/cast, /var/log/cast, and /etc/air. The apply-
# bundle path in services::ota needs to write /usr/local/bin/cast-server
# and its .prev sibling; from inside the sandbox those paths are RO
# (EROFS) regardless of uid, because ProtectSystem uses mount-namespace
# remounting that is inherited by sudo'd children.
#
# Punching a hole in cast.service's sandbox (widening ReadWritePaths to
# include /usr/local/bin) would let any post-exploitation from cast-
# server plant arbitrary binaries in /usr/local/bin. Instead we spawn
# a fresh, unsandboxed oneshot unit and let IT do the swap.
#
# # Contract
#
# Input: a staging dir path at /var/lib/cast/updates/staging/<version>/
# containing at minimum `cast-server` (an ELF for this device arch). Optionally
# `web.tar` (a tar of the web/dist tree) and `migrations.tar`.
#
# The exact staging dir is read from the marker file
# /var/lib/cast/updates/staging/PENDING, which contains a single line:
# the absolute path of the staging directory to apply. cast-server
# writes this marker atomically (via `mv`) after verifying the bundle
# signature and unpacking it; the helper refuses to run without it.
#
# Output on success: the new binary is in place at /usr/local/bin/
# cast-server, the previous binary is at .prev for rollback, and the
# marker file is deleted. cast-server then restarts via the existing
# AIR_OTA_CTL sudoers entry.
#
# Output on failure: exit non-zero with a short diagnostic on stderr.
# The oneshot unit propagates that to cast-server via the --wait exit
# code. The caller is responsible for cleanup of the staging dir on
# persistent failure; we leave it in place so a human operator can
# inspect what we refused to apply.
#
# # Safety invariants
#
# 1. Refuse to touch any path outside the expected layout. The staging
#    dir path is validated against a strict regex before use.
# 2. Never rename a wrong-architecture binary into place (a stray x86_64
#    build on Pi, or aarch64 build on N100, would turn OTA into a brick).
# 3. Never leave the operator without at least one working binary —
#    the rename sequence is old→.prev then new→current, NOT the other
#    way around, so at any interrupt the device has either the old
#    binary (name unchanged) or both (.prev + current, new installed).
# 4. The marker file is consumed on success. On retry without a fresh
#    marker the helper is a no-op.

set -euo pipefail

MARKER=/var/lib/cast/updates/staging/PENDING
BIN_TARGET=/usr/local/bin/cast-server
BIN_PREV=/usr/local/bin/cast-server.prev
WEB_DIR=/var/lib/cast/web
WEB_PREV=/var/lib/cast/web.prev
VERSION_FILE=/etc/air/version
SOURCE_COMMIT_FILE=/etc/air/source-commit
ROOT_STAGE=""
TREE_STAGE=""
OTA_TMP_PARENT="${AIR_OTA_TMPDIR:-${TMPDIR:-/var/tmp}}"
OTA_TMP_PARENT="${OTA_TMP_PARENT%/}"
[ -n "$OTA_TMP_PARENT" ] || OTA_TMP_PARENT=/var/tmp

log() { echo "[air-ota-apply] $*" >&2; }

cleanup() {
    local dir
    for dir in "${ROOT_STAGE:-}" "${TREE_STAGE:-}"; do
        [ -n "$dir" ] || continue
        case "$dir" in
            "$OTA_TMP_PARENT"/air-ota-*) rm -rf "$dir" ;;
            *) log "refusing cleanup outside OTA temp parent: $dir" ;;
        esac
    done
}
trap cleanup EXIT

ensure_ota_tmp_parent() {
    install -d -m 1777 -o root -g root "$OTA_TMP_PARENT"
}

ota_mktemp_file() {
    local label="$1"
    ensure_ota_tmp_parent
    mktemp "${OTA_TMP_PARENT}/air-ota-${label}.XXXXXX"
}

ota_mktemp_dir() {
    local label="$1"
    ensure_ota_tmp_parent
    mktemp -d "${OTA_TMP_PARENT}/air-ota-${label}.XXXXXX"
}

validate_plain_tar() {
    local tar_path="$1"
    local label="$2"
    local list types
    list="$(ota_mktemp_file tar-list)"
    types="$(ota_mktemp_file tar-types)"
    if ! tar -tf "$tar_path" >"$list" 2>/dev/null; then
        rm -f "$list" "$types"
        log "refusing ${label}: archive listing failed"
        return 1
    fi
    if grep -E '(^/|\.\./|/\.\./)' "$list" >/dev/null; then
        rm -f "$list" "$types"
        log "refusing ${label}: absolute or traversal-style path"
        return 1
    fi
    if ! tar -tvf "$tar_path" 2>/dev/null \
            | awk 'NR >= 1 { print substr($1, 1, 1) }' >"$types"; then
        rm -f "$list" "$types"
        log "refusing ${label}: verbose archive listing failed"
        return 1
    fi
    if grep -E '^[lh]$' "$types" >/dev/null; then
        rm -f "$list" "$types"
        log "refusing ${label}: contains symlink/hardlink entries"
        return 1
    fi
    rm -f "$list" "$types"
    return 0
}

ensure_groups() {
    local group
    for group in "$@"; do
        if ! getent group "$group" >/dev/null 2>&1; then
            groupadd --system "$group" 2>/dev/null || true
        fi
    done
}

ensure_cast_account() {
    if ! getent group cast >/dev/null 2>&1; then
        groupadd --system cast
    fi
    if ! id -u cast >/dev/null 2>&1; then
        useradd --system --gid cast --home-dir /var/lib/cast --shell /usr/sbin/nologin cast
    else
        usermod --home /var/lib/cast --shell /usr/sbin/nologin cast 2>/dev/null || true
    fi
    # Camera and serial access are still needed by the unprivileged web service.
    # x86_64 mini-PC installs also need render-node access for VAAPI encode.
    case "$(device_arch)" in
        x86_64|amd64)
            ensure_groups dialout video render
            usermod -aG dialout,video,render cast 2>/dev/null || true
            ;;
        *)
            ensure_groups dialout video
            usermod -aG dialout,video cast 2>/dev/null || true
            ;;
    esac
}

finalize_cast_ownership() {
    install -d -m 0750 -o cast -g cast /var/lib/cast /var/log/cast
    # Do not follow symlinks: recordings may point at removable media.
    chown -hR cast:cast /var/lib/cast /var/log/cast 2>/dev/null || true
    if [ -d /run/air ]; then
        chown cast:cast /run/air 2>/dev/null || true
        chmod 0750 /run/air 2>/dev/null || true
    fi
    install -d -m 0755 -o root -g root /etc/air
    for f in /etc/air/jwt_secret /etc/air/environment; do
        if [ -e "$f" ]; then
            chown root:cast "$f" 2>/dev/null || true
            chmod 0640 "$f" 2>/dev/null || true
        fi
    done
}

device_arch() {
    if [ -s /etc/air/arch ]; then
        tr -d '[:space:]' </etc/air/arch
    else
        uname -m
    fi
}

expected_file_pattern() {
    case "$(device_arch)" in
        aarch64|arm64)
            printf '%s\n' 'ARM aarch64'
            ;;
        x86_64|amd64)
            printf '%s\n' 'x86-64'
            ;;
        *)
            return 1
            ;;
    esac
}

run_n100_cleanup_if_available() {
    case "$(device_arch)" in
        x86_64|amd64) ;;
        *) return 0 ;;
    esac
    if [ -x /usr/local/sbin/air-n100-cleanup ]; then
        /usr/local/sbin/air-n100-cleanup || log "air-n100-cleanup returned $?"
    fi
}

prune_n100_pi_only_sudoers() {
    local sudoers="$1"
    case "$(device_arch)" in
        x86_64|amd64) ;;
        *) return 0 ;;
    esac
    [ -f "$sudoers" ] || return 0
    sed -i \
        -e '/^Cmnd_Alias[[:space:]]\+AIR_CAMERA_OVERLAY[[:space:]]*=/d' \
        -e 's/,[[:space:]]*AIR_CAMERA_OVERLAY//g' \
        -e 's/AIR_CAMERA_OVERLAY,[[:space:]]*//g' \
        "$sudoers"
}

if [ ! -f "$MARKER" ]; then
    log "no marker at $MARKER — nothing to do"
    exit 0
fi

STAGING="$(head -c 512 "$MARKER" | tr -d '\r\n')"
# Strict layout: staging path MUST live under /var/lib/cast/updates/staging/
# and match a version directory. Anything else (traversal, symlink) is
# rejected with a loud error.
if ! [[ "$STAGING" =~ ^/var/lib/cast/updates/staging/[A-Za-z0-9._-]+/?$ ]]; then
    log "refusing: marker points outside allowed staging prefix: $STAGING"
    exit 2
fi
if [ ! -d "$STAGING" ]; then
    log "staging dir missing: $STAGING"
    exit 3
fi
SIGNED_VERSION="$(basename "$STAGING")"

# If cast-server staged the original signed manifest + bundle sidecars, verify
# them with the already-installed root-owned binary, then extract the bundle to
# a root-owned temp dir and apply from there. That keeps root-owned payloads
# (helpers, sudoers, units, drop-ins) out of the cast-writable staging trust
# boundary. Older binaries may stage only extracted files; those remain binary
# + web-only, with image_tree.tar ignored below.
if [ -s "${STAGING%/}/manifest.json" ] && [ -s "${STAGING%/}/bundle.tar.xz" ]; then
    if [ ! -x "$BIN_TARGET" ]; then
        log "refusing signed sidecar apply: $BIN_TARGET is not executable"
        exit 7
    fi
    if ! timeout 30 "$BIN_TARGET" --ota-verify-staging "$STAGING"; then
        log "refusing signed sidecar apply: manifest or bundle verification failed"
        exit 7
    fi
    ROOT_STAGE="$(ota_mktemp_dir root)"
    validate_plain_tar "${STAGING%/}/bundle.tar.xz" "bundle.tar.xz" || exit 7
    tar -C "$ROOT_STAGE" \
        --no-same-owner \
        --no-same-permissions \
        --no-xattrs \
        --no-acls \
        -xf "${STAGING%/}/bundle.tar.xz"
    STAGING="$ROOT_STAGE"
    log "signed bundle verified and extracted to root-owned staging"
else
    log "signed bundle sidecars missing; root-level image_tree payloads will remain disabled for this OTA"
fi

NEW_BIN="${STAGING%/}/cast-server"
if [ ! -s "$NEW_BIN" ]; then
    log "no cast-server in staging dir"
    exit 4
fi

# Newer images run cast.service as this non-login service account instead of
# the interactive SSH `air` user. Create it before any bundle swap so an OTA
# can safely install the new systemd unit + sudoers policy on older devices.
ensure_cast_account

# Architecture sanity — a cross-build that targeted the wrong triple
# would brick the device. `file` is portable; if it's missing we fall
# through (the kernel will refuse to exec a wrong-arch ELF anyway).
if command -v file >/dev/null 2>&1; then
    if ! pattern="$(expected_file_pattern)"; then
        log "refusing: unsupported OTA device architecture '$(device_arch)'"
        exit 5
    fi
    if ! file -b "$NEW_BIN" | grep -q "$pattern"; then
        log "refusing: $NEW_BIN is not a $(device_arch) ELF ($(file -b "$NEW_BIN"))"
        exit 5
    fi
fi

NEW_WEB_TAR="${STAGING%/}/web.tar"
if [ -s "$NEW_WEB_TAR" ]; then
    validate_plain_tar "$NEW_WEB_TAR" "web.tar" || exit 6
fi

# Binary swap — two-step so power loss at any point leaves either the
# old or the new binary fully at $BIN_TARGET, never a half-written copy.
#
# 1. `install ... $BIN_TARGET.new` copies the new binary to a staging
#    name in the same dir (so `mv` is a pure rename, not cross-dev).
#    Partial write of the staging file is fine — it's never exec'd.
# 2. `sync` forces the new staged file to disk. Without this, a power
#    loss between mv and next write could leave an empty inode on
#    extfs/f2fs with the old name overwritten.
# 3. `mv` the old out of the way (still in place for rollback via
#    watchdog), then `mv` the staged new in. Both are atomic rename(2)s
#    on the same filesystem.
# 4. sync again so the dentry is persisted.
#
# If step 1 fails: $BIN_TARGET is untouched. If step 3a (mv-out) fails:
# also untouched. If step 3b (mv-in) fails: $BIN_PREV is now the live
# copy, but systemd's ExecStart=/usr/local/bin/cast-server fails to
# start → liveness watchdog catches + escalates (or update-watchdog
# rolls back on next boot).
rm -f "$BIN_PREV" "${BIN_TARGET}.new"
install -m 0755 -o root -g root "$NEW_BIN" "${BIN_TARGET}.new"
sync
if [ -e "$BIN_TARGET" ]; then
    mv "$BIN_TARGET" "$BIN_PREV"
fi
mv "${BIN_TARGET}.new" "$BIN_TARGET"
sync
log "binary installed: $BIN_TARGET"

# Optional web tree swap. web.tar unpack happens here rather than at
# stage-bundle time so the old tree is live right up to the moment we
# can replace it atomically. Added `sync` before the rename for the
# same reason as the binary swap — dentry flush on ext4 is lazy, and
# a power loss between tar-unpack and mv would leave the WEB_DIR
# pointing at a path that never existed on-disk.
if [ -s "$NEW_WEB_TAR" ]; then
    NEW_WEB_DIR="${WEB_DIR}.new"
    rm -rf "$NEW_WEB_DIR"
    mkdir -p "$NEW_WEB_DIR"
    tar -C "$NEW_WEB_DIR" -xf "$NEW_WEB_TAR"
    sync
    rm -rf "$WEB_PREV"
    if [ -d "$WEB_DIR" ]; then
        mv "$WEB_DIR" "$WEB_PREV"
    fi
    mv "$NEW_WEB_DIR" "$WEB_DIR"
    chown -hR cast:cast "$WEB_DIR"
    sync
    log "web tree swapped: $WEB_DIR"
fi

# Optional image-tree helpers (sudo scripts, sudoers drop-in, systemd
# units). A bundle that ships a new helper script or an updated
# sudoers line doesn't require the operator to scp anything anymore —
# the apply helper installs whatever's in image_tree.tar into a
# strictly-allowlisted set of paths.
#
# Allowlist rules:
#   /usr/local/sbin/air-*              — exec 0755 root:root
#   /etc/sudoers.d/air-*               — exec 0440 root:root (visudo -c gates)
#   /etc/systemd/system/air-*.service  — 0644 root:root
#   /etc/systemd/system/{cast,zerotier-one}.service.d/*-air-*.conf
#                                      — 0644 root:root
#   /etc/tmpfiles.d/air.conf           — 0644 root:root + systemd-tmpfiles --create
#   /etc/logrotate.d/cast              — 0644 root:root
#   /etc/avahi/services/air.service    — 0644 root:root + avahi reload/restart
#   /var/lib/cast/calibrations/*_*.json — 0644 root:root (wave 51)
#
# Any file whose basename or target path falls outside these patterns
# is silently ignored, which shrinks the blast radius if someone's
# `publish-ota.sh` mis-stages (e.g. a stray /etc/passwd). A failure
# validating the sudoers update rolls back the whole image_tree step
# (keeps original sudoers, keeps boot-safe).
IMAGE_TREE_TAR="${STAGING%/}/image_tree.tar"
if [ -s "$IMAGE_TREE_TAR" ] && [ -z "$ROOT_STAGE" ]; then
    log "ignoring image_tree.tar: signed root-owned staging was not available for this OTA"
fi
if [ -n "$ROOT_STAGE" ] && [ -s "$IMAGE_TREE_TAR" ]; then
    TREE_STAGE="$(ota_mktemp_dir tree)"
    # Defence-in-depth on tar extraction. Our tarballs are produced
    # by scripts/publish-ota.sh which never emits symlinks, xattrs,
    # absolute paths, or traversal strings — but a compromised
    # publisher or a corrupted bundle could. Pre-scan the archive
    # list and refuse the whole bundle on any suspect entry.
    accept_tree=1
    tree_list="$(ota_mktemp_file tree-list)"
    tree_types="$(ota_mktemp_file tree-types)"
    # Pre-scan 1: reject entries beginning with `/` (absolute) or
    # containing `../` (traversal). Would otherwise potentially
    # escape $TREE_STAGE before the allowlist check fires.
    if ! tar -tf "$IMAGE_TREE_TAR" >"$tree_list" 2>/dev/null; then
        log "refusing image_tree.tar: archive listing failed"
        accept_tree=0
    elif grep -E '(^/|\.\./|/\.\./)' "$tree_list" >/dev/null; then
        log "refusing image_tree.tar: absolute or traversal-style path"
        accept_tree=0
    fi
    # Pre-scan 2: reject symlink (l) + hardlink (h) entries in the
    # verbose listing. A symlink aimed at /etc/shadow could slip an
    # arbitrary write past the basename-match allowlist below.
    if [ "$accept_tree" = 1 ]; then
        if ! tar -tvf "$IMAGE_TREE_TAR" 2>/dev/null \
                | awk 'NR >= 1 { print substr($1, 1, 1) }' >"$tree_types"; then
            log "refusing image_tree.tar: verbose archive listing failed"
            accept_tree=0
        elif grep -E '^[lh]$' "$tree_types" >/dev/null; then
            log "refusing image_tree.tar: contains symlink/hardlink entries"
            accept_tree=0
        fi
    fi
    rm -f "$tree_list" "$tree_types"
    if [ "$accept_tree" = 1 ]; then
        # --no-same-owner/--no-same-permissions — owner + mode come
        #   from the `install` calls below, never the archive.
        # --no-xattrs / --no-acls — we don't ship ACLs or xattrs;
        #   any attempt to set them is suspicious.
        tar -C "$TREE_STAGE" \
            --no-same-owner \
            --no-same-permissions \
            --no-xattrs \
            --no-acls \
            -xf "$IMAGE_TREE_TAR"
    else
        rm -rf "$TREE_STAGE"
        TREE_STAGE=""
    fi

    # Guard all sub-installs on TREE_STAGE being non-empty. Without
    # this, a refused pre-scan sets TREE_STAGE="" and downstream
    # tests like `[ -d "$TREE_STAGE/usr/local/sbin" ]` would resolve
    # to `[ -d "/usr/local/sbin" ]` — a real directory — and try to
    # iterate over the live contents. That would reinstall every
    # system helper from itself (harmless thanks to the content
    # match) but a surprising side-effect that violates the
    # "refused archive does nothing" contract.

    # Helper scripts
    if [ -n "$TREE_STAGE" ] && [ -d "$TREE_STAGE/usr/local/sbin" ]; then
        for f in "$TREE_STAGE/usr/local/sbin"/air-*; do
            [ -f "$f" ] || continue
            bn="$(basename "$f")"
            case "$bn" in
                air-*)
                    install -m 0755 -o root -g root "$f" "/usr/local/sbin/$bn"
                    log "helper installed: /usr/local/sbin/$bn"
                    ;;
                *)
                    log "refusing (helper name): $bn"
                    ;;
            esac
        done
    fi

    # If this OTA shipped an updated boot helper, run it once now so boot
    # config changes such as enable_uart=1 are written before the operator's
    # next reboot. The helper is idempotent and still runs on every boot.
    if [ -x /usr/local/sbin/air-ensure-boot-rw ]; then
        /usr/local/sbin/air-ensure-boot-rw 2>&1 | logger -t air-ota-apply \
            || log "air-ensure-boot-rw returned $? during OTA apply"
    fi

    # systemd units (reload daemon after any install, enable any new)
    units_changed=0
    new_units=()
    if [ -n "$TREE_STAGE" ] && [ -d "$TREE_STAGE/etc/systemd/system" ]; then
        # Match both .service AND .timer files. Timers are first-class
        # systemd units; `air-cast-liveness.timer`, `air-zerotier-
        # watchdog.timer` etc. ship via OTA and need the same install
        # discipline. The shell glob `air-*.{service,timer}` is bash-
        # specific brace expansion so we use two passes for portability.
        for f in "$TREE_STAGE/etc/systemd/system"/air-*.service \
                 "$TREE_STAGE/etc/systemd/system"/air-*.timer; do
            [ -f "$f" ] || continue
            bn="$(basename "$f")"
            case "$bn" in
                air-*.service|air-*.timer)
                    # Detect first-time install so we can `systemctl
                    # enable` it below — without this, a NEW unit
                    # shipped via OTA (e.g. `air-ensure-boot-rw.service`
                    # added in wave 22) lands on disk but never auto-
                    # starts on reboot. Existing units are left alone;
                    # re-enabling them would be a no-op but burns an
                    # fsync + a journal entry per OTA. The N100 cleanup
                    # service is intentionally timer-driven; enable only
                    # air-n100-cleanup.timer, not its oneshot service.
                    was_present=0
                    [ -e "/etc/systemd/system/$bn" ] && was_present=1
                    install -m 0644 -o root -g root "$f" "/etc/systemd/system/$bn"
                    if [ "$was_present" = 0 ] && [ "$bn" != "air-n100-cleanup.service" ]; then
                        new_units+=("$bn")
                    fi
                    log "unit installed: /etc/systemd/system/$bn"
                    units_changed=1
                    ;;
                *)
                    log "refusing (unit name): $bn"
                    ;;
            esac
        done
    fi

    # systemd drop-ins for explicitly allowlisted units. `cast.service.d`
    # carries narrow service sandbox repairs such as /run/sudo writability;
    # `zerotier-one.service.d` carries the Restart=always never-stop policy.
    # Anything that doesn't match `<unit>.service.d/*-air-*.conf` is refused.
    for unit_dropin in cast.service.d zerotier-one.service.d; do
        if [ -n "$TREE_STAGE" ] && [ -d "$TREE_STAGE/etc/systemd/system/$unit_dropin" ]; then
            install -d -o root -g root -m 0755 "/etc/systemd/system/$unit_dropin"
            for f in "$TREE_STAGE/etc/systemd/system/$unit_dropin"/*.conf; do
                [ -f "$f" ] || continue
                bn="$(basename "$f")"
                case "$bn" in
                    # Pattern: NN-air-*.conf — number prefix is systemd's
                    # ordering convention, "air-" makes the file source
                    # unambiguous, ".conf" is mandatory for systemd to
                    # pick the drop-in up.
                    [0-9]*-air-*.conf)
                        case "$bn" in
                            *amd64*.conf|*x86_64*.conf)
                                case "$(device_arch)" in
                                    x86_64|amd64) ;;
                                    *)
                                        log "refusing (wrong-arch drop-in): $unit_dropin/$bn for $(device_arch)"
                                        continue
                                        ;;
                                esac
                                ;;
                        esac
                        install -m 0644 -o root -g root "$f" \
                            "/etc/systemd/system/$unit_dropin/$bn"
                        log "drop-in installed: /etc/systemd/system/$unit_dropin/$bn"
                        units_changed=1
                        ;;
                    *)
                        log "refusing (drop-in name): $unit_dropin/$bn"
                        ;;
                esac
            done
        fi
    done

    # Sudoers drop-in — validate with `visudo -c` BEFORE atomic rename
    # so a typo can't brick sudo on this device. If validation fails
    # we log and keep the old file, which is always boot-safe.
    if [ -n "$TREE_STAGE" ] && [ -d "$TREE_STAGE/etc/sudoers.d" ]; then
        for f in "$TREE_STAGE/etc/sudoers.d"/air-*; do
            [ -f "$f" ] || continue
            bn="$(basename "$f")"
            case "$bn" in
                air-*)
                    tmp="/etc/sudoers.d/${bn}.new-$$"
                    install -m 0440 -o root -g root "$f" "$tmp"
                    prune_n100_pi_only_sudoers "$tmp"
                    if visudo -cf "$tmp" >/dev/null 2>&1; then
                        mv -f "$tmp" "/etc/sudoers.d/$bn"
                        log "sudoers installed: /etc/sudoers.d/$bn"
                    else
                        log "refusing (sudoers syntax): /etc/sudoers.d/$bn — keeping existing"
                        rm -f "$tmp"
                    fi
                    ;;
                *)
                    log "refusing (sudoers name): $bn"
                    ;;
            esac
        done
    fi

    # Runtime config files from the shared N100 stage allowlist. These are
    # exact-name installs only; keep broader patterns out of this block.
    if [ -n "$TREE_STAGE" ] && [ -f "$TREE_STAGE/etc/tmpfiles.d/air.conf" ]; then
        install -D -m 0644 -o root -g root \
            "$TREE_STAGE/etc/tmpfiles.d/air.conf" \
            /etc/tmpfiles.d/air.conf
        log "tmpfiles config installed: /etc/tmpfiles.d/air.conf"
        systemd-tmpfiles --create /etc/tmpfiles.d/air.conf 2>/dev/null \
            || log "systemd-tmpfiles --create /etc/tmpfiles.d/air.conf returned $?"
    fi
    if [ -n "$TREE_STAGE" ] && [ -f "$TREE_STAGE/etc/logrotate.d/cast" ]; then
        install -D -m 0644 -o root -g root \
            "$TREE_STAGE/etc/logrotate.d/cast" \
            /etc/logrotate.d/cast
        log "logrotate config installed: /etc/logrotate.d/cast"
    fi
    if [ -n "$TREE_STAGE" ] && [ -f "$TREE_STAGE/etc/avahi/services/air.service" ]; then
        install -D -m 0644 -o root -g root \
            "$TREE_STAGE/etc/avahi/services/air.service" \
            /etc/avahi/services/air.service
        log "Avahi service installed: /etc/avahi/services/air.service"
        systemctl try-reload-or-restart avahi-daemon.service 2>/dev/null \
            || log "avahi-daemon reload/restart returned $?"
    fi

    # Camera calibration JSONs — installed to
    # /var/lib/cast/calibrations/, allowlist is `*_*.json` (must
    # contain an underscore somewhere in the basename, reject
    # anything else). The startup loader maps `camera_type` +
    # libcamera-detected sensor name to the correct file name here,
    # so shipping calibrations via OTA means an operator who swaps
    # sensors (e.g. HQ → Module 3 Wide) gets the matching
    # intrinsics on the next OTA + reboot, without having to manually
    # scp a file. Added in wave 51.
    if [ -n "$TREE_STAGE" ] && [ -d "$TREE_STAGE/var/lib/cast/calibrations" ]; then
        mkdir -p /var/lib/cast/calibrations
        for f in "$TREE_STAGE/var/lib/cast/calibrations"/*.json; do
            [ -f "$f" ] || continue
            bn="$(basename "$f")"
            # Require a non-trivial name with at least one underscore
            # (matches `imx708_wide_1280x720.json`, `ov5647_720p.json`,
            # etc.). Rejects things like `.json` or `etc` that got
            # misnamed.
            case "$bn" in
                *_*.json)
                    install -m 0644 -o root -g root "$f" \
                        "/var/lib/cast/calibrations/$bn"
                    log "calibration installed: /var/lib/cast/calibrations/$bn"
                    ;;
                *)
                    log "refusing (calibration name): $bn"
                    ;;
            esac
        done
    fi

    if [ "$units_changed" = 1 ]; then
        systemctl daemon-reload || log "daemon-reload returned $?"
    fi

    # Enable freshly-installed units so they actually start on the
    # next boot. `systemctl enable` is a no-op for units that are
    # already enabled, but for a new unit shipped via OTA it's the
    # difference between "symlink created, starts on reboot" and
    # "file on disk, inert forever". Failure is logged but does not
    # fail the apply — a bad [Install] section in a unit file would
    # otherwise brick every OTA, and the binary/web swap above is
    # the critical path.
    #
    # Guard the array expansion under `set -u`: with an empty
    # new_units= array, bash < 4.4 treats "${new_units[@]}" as
    # unbound-variable access and errors. Pi OS Bookworm bash 5.2
    # is safe but the explicit length check keeps this robust if
    # the script ever ends up on an older image.
    if [ "${#new_units[@]}" -gt 0 ]; then
        for bn in "${new_units[@]}"; do
            if systemctl enable "$bn" 2>/dev/null; then
                log "unit enabled: $bn"
            else
                log "unit enable failed: $bn (will not run on reboot)"
            fi
        done
        # Second daemon-reload after enable — the `enable` step
        # creates Wants/Required-By symlinks that systemd picks up
        # on the next daemon-reload. Without this, a manual
        # `systemctl start <unit>` between apply + reboot would
        # say "no such unit" even though the file + symlinks are on
        # disk.
        systemctl daemon-reload || log "post-enable daemon-reload returned $?"

        # Start newly-installed TIMERS immediately so the recovery
        # primitive they drive (e.g. air-zerotier-watchdog.timer →
        # 60 s ZT health check) is active right now — not "on next
        # reboot, several hours later". Services are NOT auto-started
        # because they typically depend on cast.service or other
        # state that's about to be restarted anyway by the OTA-
        # finishing systemctl restart cast.service. Only timers get
        # the immediate-start treatment.
        #
        # Exception: air-gst-runtime-repair.timer may do slow apt work
        # on older Pi images. Starting it immediately after OTA used to
        # create a confusing second repair run a few minutes after the
        # normal OTA service restart. It is enabled for future boots,
        # but deliberately not kicked during the apply transaction.
        for bn in "${new_units[@]}"; do
            case "$bn" in
                air-gst-runtime-repair.timer)
                    log "timer start deferred after OTA: $bn"
                    ;;
                *.timer)
                    if systemctl start "$bn" 2>/dev/null; then
                        log "timer started: $bn"
                    else
                        log "timer start failed: $bn (will start on next reboot)"
                    fi
                    ;;
            esac
        done
    fi
fi

run_n100_cleanup_if_available

# Record new version. Reads it out of the staging dir name (which the
# publish-ota.sh script writes as `v<version>` → staging/<version>/).
VERSION="$SIGNED_VERSION"
mkdir -p "$(dirname "$VERSION_FILE")"
printf '%s\n' "$VERSION" > "$VERSION_FILE"
chmod 0644 "$VERSION_FILE"
log "version recorded: $VERSION"

SOURCE_COMMIT_STAGED="${STAGING%/}/source_commit.txt"
if [ -s "$SOURCE_COMMIT_STAGED" ]; then
    install -m 0644 -o root -g root "$SOURCE_COMMIT_STAGED" "$SOURCE_COMMIT_FILE"
    log "source commit recorded: $SOURCE_COMMIT_FILE"
fi

# Last successful step before consuming the marker: migrate writable state to
# the `cast` service account. If anything above failed, ownership stays as-is
# so the old process can keep serving until the operator retries or rolls back.
finalize_cast_ownership

# Consume the marker last — any earlier bail means this stays put and
# a human operator can re-trigger by poking systemctl.
rm -f "$MARKER"
log "done; caller should now sudo /bin/systemctl --no-block restart cast.service"
