#!/bin/bash
# vexor-setup - first-run wizard for Vexor monitoring platform
# Idempotent: safe to re-run.
set -euo pipefail

NONINTERACTIVE=0
[ "${1:-}" = "--non-interactive" ] && NONINTERACTIVE=1

GREEN='\e[32m'; YELLOW='\e[33m'; RED='\e[31m'; BOLD='\e[1m'; RST='\e[0m'
say()  { echo -e "${GREEN}[vexor-setup]${RST} $*"; }
warn() { echo -e "${YELLOW}[warn]${RST} $*"; }
die()  { echo -e "${RED}[error]${RST} $*" >&2; exit 1; }
ask()  {
  local prompt="$1" var="$2" default="${3:-}" secret="${4:-0}"
  if [ $NONINTERACTIVE -eq 1 ]; then printf -v "$var" '%s' "$default"; return; fi
  local val
  if [ "$secret" = "1" ]; then
    read -rsp "$prompt: " val; echo
  else
    if [ -n "$default" ]; then read -rp "$prompt [$default]: " val; val="${val:-$default}"
    else read -rp "$prompt: " val; fi
  fi
  printf -v "$var" '%s' "$val"
}
genpw() {
  # Run with pipefail OFF locally because head closing the pipe makes tr exit
  # with SIGPIPE under `set -o pipefail`.
  set +o pipefail
  LC_ALL=C tr -dc 'A-Za-z0-9' </dev/urandom 2>/dev/null | head -c "${1:-24}"
  echo
  set -o pipefail
}

[ "$(id -u)" -eq 0 ] || die "Must run as root"
# Helper: bracket bare IPv6 addresses for use inside URLs (https://[::1]/...).
bracket_host() {
  case "$1" in
    *:*) echo "[$1]" ;;
    *)   echo "$1"   ;;
  esac
}

echo -e "${BOLD}=================================================="
echo -e "  Vexor Monitoring Platform - First Run Setup"
echo -e "==================================================${RST}"
echo

mkdir -p /etc/vexor /var/log/vexor

# -------- 0. Service account --------
# Create the unprivileged 'vexor' user that vexor-api / perfdata-collector run
# as. Idempotent.
id vexor >/dev/null 2>&1 || useradd -r -d /var/lib/vexor -s /sbin/nologin vexor
mkdir -p /var/lib/vexor/branding /var/lib/vexor/reports /var/lib/vexor/backups
chown -R vexor:vexor /var/lib/vexor /var/log/vexor
# vexor-api must be able to write generated Naemon host configs and the
# external command file:
mkdir -p /etc/naemon/vexor /var/lib/naemon/rw
# Make sure vexor user is in naemon group BEFORE chowning so the gid is valid.
if getent group naemon >/dev/null 2>&1 && getent passwd vexor >/dev/null 2>&1; then
  usermod -a -G naemon vexor 2>/dev/null || true
fi
# vexor-api writes generated host/service configs; naemon reads them.
# Use vexor:naemon with setgid so new files inherit group, and group-write.
for d in /etc/naemon/vexor /etc/naemon/vexor/hosts /etc/naemon/vexor/services /etc/naemon/vexor/commands /etc/naemon/vexor/templates; do
  [ -d "$d" ] || mkdir -p "$d"
  if getent passwd vexor >/dev/null 2>&1 && getent group naemon >/dev/null 2>&1; then
    chown vexor:naemon "$d" || true
    chmod 2775 "$d" || true
  fi
done
# Plugin secret store (per-credential password / key files referenced by
# Naemon check commands). Owned by vexor (writes them), group naemon (plugins
# need to read).
mkdir -p /etc/naemon/keys
chown vexor:naemon /etc/naemon/keys 2>/dev/null || true
chmod 750 /etc/naemon/keys
# vexor must be in the naemon group to read the livestatus unix socket
# (/var/lib/naemon/livestatus.sock, mode 0660 naemon:naemon).
if getent group naemon >/dev/null 2>&1; then
  usermod -a -G naemon vexor 2>/dev/null || true
fi

# vexor-api invokes `naemon -v` (writes to logfile) and `systemctl reload
# naemon` from the unprivileged vexor user. Grant the necessary access:
#   - group-write on /var/log/naemon for the verifier
#   - polkit rule for the reload via systemd dbus
mkdir -p /var/log/naemon
chgrp -R naemon /var/log/naemon 2>/dev/null || true
chmod -R g+w /var/log/naemon 2>/dev/null || true
# naemon -v writes to cache + spool; vexor needs g+w via naemon group
for d in /var/cache/naemon /var/lib/naemon/spool; do
  [ -d "$d" ] || continue
  chgrp -R naemon "$d" 2>/dev/null || true
  chmod -R g+w "$d" 2>/dev/null || true
done

if ! rpm -q polkit >/dev/null 2>&1; then
  dnf install -y polkit >/dev/null 2>&1 || true
fi
# Standard Nagios plugins (check_ping, check_tcp, check_http, ...) referenced
# by vexor-commands.cfg. Already installed via vexor-naemon deps in most
# cases, but be defensive.
if [ ! -x /usr/lib64/nagios/plugins/check_ping ]; then
  dnf install -y nagios-plugins-all >/dev/null 2>&1 || \
    dnf install -y nagios-plugins-ping nagios-plugins-tcp nagios-plugins-http \
      nagios-plugins-ssh nagios-plugins-by_ssh nagios-plugins-ftp nagios-plugins-disk \
      nagios-plugins-load nagios-plugins-procs >/dev/null 2>&1 || true
fi
# Agentless SSH dispatcher: vexor-api references /opt/vexor/plugins/check_by_ssh.
# Symlink to the package-provided binary so OS-family upgrades don't break us.
mkdir -p /opt/vexor/plugins
if [ -x /usr/lib64/nagios/plugins/check_by_ssh ] && [ ! -e /opt/vexor/plugins/check_by_ssh ]; then
  ln -sf /usr/lib64/nagios/plugins/check_by_ssh /opt/vexor/plugins/check_by_ssh
fi
# Payload directory used by /opt/vexor/plugins/vexor_ssh_check. vexor-api
# writes one file per unique remote command; naemon reads them at check-time.
mkdir -p /etc/naemon/vexor/payloads
if getent passwd vexor >/dev/null && getent group naemon >/dev/null; then
  chown vexor:naemon /etc/naemon/vexor/payloads
  chmod 0775 /etc/naemon/vexor/payloads
fi
# Perfdata spool: naemon writes here, vexor-perfdata-collector ingests + cleans.
mkdir -p /var/spool/vexor-perfdata
if getent passwd naemon >/dev/null && getent group vexor >/dev/null; then
  chown naemon:vexor /var/spool/vexor-perfdata
  chmod 2775 /var/spool/vexor-perfdata
fi
# WMI agentless plugin needs impacket (not packaged for EL10). Install
# system-wide via pip if missing.
if ! python3 -c "import impacket" 2>/dev/null; then
  if ! command -v pip3 >/dev/null 2>&1; then
    warn "impacket missing and pip3 (python3-pip) not installed -> WMI/agentless Windows checks disabled. Install python3-pip, then: pip3 install impacket"
  elif pip3 install --quiet impacket >/dev/null 2>&1; then
    say "Installed impacket (agentless WMI/Windows checks enabled)"
  else
    warn "impacket install failed (PyPI unreachable or pip error) -> WMI/agentless Windows checks disabled until 'pip3 install impacket' succeeds"
  fi
fi
# Synthetic-check runner needs jsonpath-ng inside the API venv. Older venvs
# (pre-business-checks) won't have it — install on-demand.
if [ -x /opt/vexor/api/venv/bin/python ]; then
  if ! /opt/vexor/api/venv/bin/python -c "import jsonpath_ng" 2>/dev/null; then
    /opt/vexor/api/venv/bin/pip install --quiet jsonpath-ng >/dev/null 2>&1 || true
  fi
fi
mkdir -p /etc/polkit-1/rules.d
cat > /etc/polkit-1/rules.d/90-vexor-naemon.rules <<'POLKIT'
polkit.addRule(function(action, subject) {
    if (action.id == "org.freedesktop.systemd1.manage-units" &&
        subject.user == "vexor") {
        var unit = action.lookup("unit");
        if (unit == "naemon.service") {
            var verb = action.lookup("verb");
            if (verb == "reload" || verb == "start" ||
                verb == "stop"   || verb == "restart") {
                return polkit.Result.YES;
            }
        }
    }
});
POLKIT
systemctl is-active polkit >/dev/null 2>&1 && systemctl restart polkit 2>/dev/null || true

# -------- 1. License --------
if [ -f /etc/vexor/license.lic ]; then
  say "License present: $(grep -ao '"license_id":"[^"]*"' /etc/vexor/license.lic | head -1)"
else
  warn "No license at /etc/vexor/license.lic - the bundled license should have been installed."
fi

# -------- 2. cred_key (used by API to encrypt host credentials) --------
if [ ! -f /etc/vexor/cred_key ]; then
  say "Generating credential encryption key"
  python3 -c "import secrets,base64; print(base64.urlsafe_b64encode(secrets.token_bytes(32)).decode())" \
    > /etc/vexor/cred_key
  chmod 640 /etc/vexor/cred_key
  chown root:vexor /etc/vexor/cred_key 2>/dev/null || :
fi

# -------- 3. notify-token --------
if [ ! -f /etc/vexor/notify-token ]; then
  say "Generating notification token"
  genpw 48 > /etc/vexor/notify-token
  chmod 640 /etc/vexor/notify-token
  chown root:vexor /etc/vexor/notify-token 2>/dev/null || :
fi

# -------- 4. TLS cert (self-signed, 10 years, with SAN for hostname + all IPs) --------
# Re-generate the cert if it's missing OR if the hostname / primary IP has changed
# since it was last issued. This keeps `https://<new-host>/` valid without browser
# name-mismatch warnings after IP/hostname changes.
TLS_CRT=/etc/vexor/tls/vexor.crt
TLS_KEY=/etc/vexor/tls/vexor.key
mkdir -p /etc/vexor/tls

current_host=$(hostname -f 2>/dev/null); [ -z "$current_host" ] && current_host=$(hostname)
current_ips=$(hostname -I 2>/dev/null | tr ' ' '\n' | grep -v ':' | grep -v '^$' | sort -u)

need_regen=1
if [ -f "$TLS_CRT" ]; then
  cert_cn=$(openssl x509 -in "$TLS_CRT" -noout -subject 2>/dev/null | sed -n 's/.*CN *= *\([^,/]*\).*/\1/p' | xargs)
  cert_san=$(openssl x509 -in "$TLS_CRT" -noout -ext subjectAltName 2>/dev/null | grep -oE '(DNS|IP Address):[^,]*' | tr ',' '\n')
  if [ "$cert_cn" = "$current_host" ]; then
    miss=0
    for ip in $current_ips; do
      echo "$cert_san" | grep -q "IP Address:$ip" || miss=1
    done
    [ $miss -eq 0 ] && need_regen=0
  fi
fi

if [ $need_regen -eq 1 ]; then
  say "Generating self-signed TLS certificate (10 years, CN=$current_host, SAN includes all local IPs)"
  san_lines="DNS:$current_host,DNS:localhost"
  for ip in $current_ips 127.0.0.1; do
    san_lines="$san_lines,IP:$ip"
  done
  openssl req -x509 -newkey rsa:2048 -nodes -days 3650 \
    -subj "/CN=$current_host/O=Vexor" \
    -addext "subjectAltName=$san_lines" \
    -addext "keyUsage=digitalSignature,keyEncipherment" \
    -addext "extendedKeyUsage=serverAuth" \
    -keyout "$TLS_KEY" \
    -out    "$TLS_CRT" 2>/dev/null
  chmod 600 "$TLS_KEY"
  # Reload nginx if it's already running so the new cert takes effect
  systemctl reload nginx 2>/dev/null || true
fi

# -------- 5. MariaDB --------
say "Starting MariaDB"
systemctl enable --now mariadb >/dev/null
sleep 1

if [ ! -f /etc/vexor/db.env ]; then
  say "Configuring MariaDB"
  ask "MariaDB root password (leave empty if unset)" MYSQL_ROOT_PW "" 1
  ROOT_OPT=""
  [ -n "$MYSQL_ROOT_PW" ] && ROOT_OPT="-p$MYSQL_ROOT_PW"

  VEXOR_DB_PW=$(genpw 24)
  mysql -uroot $ROOT_OPT <<SQL
CREATE DATABASE IF NOT EXISTS vexor CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
CREATE USER IF NOT EXISTS 'vexor'@'localhost' IDENTIFIED BY '${VEXOR_DB_PW}';
ALTER USER 'vexor'@'localhost' IDENTIFIED BY '${VEXOR_DB_PW}';
GRANT ALL PRIVILEGES ON vexor.* TO 'vexor'@'localhost';
FLUSH PRIVILEGES;
SQL
  cat > /etc/vexor/db.env <<EOF
VEXOR_DB_PASSWORD=${VEXOR_DB_PW}
VEXOR_DB_URL=mysql+pymysql://vexor:${VEXOR_DB_PW}@127.0.0.1/vexor
EOF
  chmod 640 /etc/vexor/db.env
  chown root:vexor /etc/vexor/db.env 2>/dev/null || :

  # Materialize .env from template
  sed -e "s|__DB_PASSWORD__|${VEXOR_DB_PW}|g" \
      -e "s|__SECRET_KEY__|$(genpw 48)|g" \
      /opt/vexor/api/.env.template > /opt/vexor/api/.env
  chown vexor:vexor /opt/vexor/api/.env 2>/dev/null || :
  chmod 600 /opt/vexor/api/.env
fi

# Ensure secret env-files that the API (user 'vexor') and naemon self-checks
# read are group-readable as root:vexor 0640. Repairs older installs where
# these were created 0600 root:root (db.env -> MariaDB check; cred_key ->
# credential encryption / 'create failed' 400; notify-token -> notifications).
for _vf in db.env cred_key notify-token; do
  if [ -f "/etc/vexor/$_vf" ]; then
    chown root:vexor "/etc/vexor/$_vf" 2>/dev/null || :
    chmod 640 "/etc/vexor/$_vf" 2>/dev/null || :
  fi
done
# Ensure backup directory exists with correct ownership (consumed by backup_router)
install -d -o vexor -g vexor -m 0750 /var/backups 2>/dev/null || mkdir -p /var/backups
chown vexor:vexor /var/backups 2>/dev/null || :

# -------- 6. InfluxDB --------
say "Starting InfluxDB"
# Bind InfluxDB to localhost only (defense-in-depth; all Vexor clients use 127.0.0.1)
if [ -f /etc/influxdb/config.toml ]; then
  grep -q "^http-bind-address" /etc/influxdb/config.toml || \
    echo 'http-bind-address = "127.0.0.1:8086"' >> /etc/influxdb/config.toml
fi
systemctl enable --now influxdb >/dev/null
# Wait for InfluxDB to listen
for i in $(seq 1 30); do
  curl -sf http://localhost:8086/health >/dev/null && break
  sleep 1
done

if [ ! -f /etc/vexor/influx.env ]; then
  say "Configuring InfluxDB (org=vexor, bucket=perfdata)"
  if [ $NONINTERACTIVE -eq 1 ]; then
    INFLUX_ADMIN_PW=$(genpw 20)
  else
    ask "InfluxDB admin password (min 8 chars)" INFLUX_ADMIN_PW "$(genpw 20)" 1
  fi
  SETUP_OUT=$(influx setup --force --skip-verify \
    --username vexor-admin --password "$INFLUX_ADMIN_PW" \
    --org vexor --bucket perfdata --retention 0 \
    --json 2>&1) || warn "InfluxDB setup returned non-zero (may already be initialized)"
  TOKEN=""
  # Influx CLI writes token to ~/.influxdbv2/configs (TOML)
  if [ -f "$HOME/.influxdbv2/configs" ]; then
    TOKEN=$(awk -F'"' '/^[[:space:]]*token[[:space:]]*=/{print $2; exit}' "$HOME/.influxdbv2/configs")
  fi
  if [ -z "$TOKEN" ]; then
    # Try to extract from setup JSON output as fallback
    TOKEN=$(printf '%s' "$SETUP_OUT" | python3 -c "import json,sys
try:
  d=json.load(sys.stdin)
  print(d.get('auth',{}).get('token') or d.get('token') or '')
except Exception: pass" 2>/dev/null || true)
  fi
  [ -n "$TOKEN" ] || die "Failed to obtain InfluxDB token"
  echo "INFLUX_TOKEN=$TOKEN" > /etc/vexor/influx.env
  chmod 640 /etc/vexor/influx.env
  # Allow Naemon plugins (check_vexor_baseline) to source this file.
  chgrp naemon /etc/vexor/influx.env 2>/dev/null || true
  if [ $NONINTERACTIVE -eq 0 ]; then
    echo "  InfluxDB admin: vexor-admin / $INFLUX_ADMIN_PW (save this!)"
  fi
fi

# -------- 7. Naemon user + dirs --------
id naemon >/dev/null 2>&1 || useradd -r -d /var/lib/naemon -s /sbin/nologin naemon
systemd-tmpfiles --create /usr/lib/tmpfiles.d/naemon.conf 2>/dev/null || true
chown -R naemon:naemon /etc/naemon /var/lib/naemon /var/log/naemon /var/cache/naemon /var/spool/naemon 2>/dev/null || true

# -------- 8. Keycloak --------
if [ ! -f /etc/vexor/keycloak.env ]; then
  say "Configuring Keycloak admin"
  if [ $NONINTERACTIVE -eq 1 ]; then
    KC_PW=$(genpw 20)
  else
    ask "Keycloak admin password" KC_PW "$(genpw 20)" 1
  fi
  cat > /etc/vexor/keycloak.env <<EOF
KC_BOOTSTRAP_ADMIN_USERNAME=admin
KC_BOOTSTRAP_ADMIN_PASSWORD=${KC_PW}
EOF
  chmod 600 /etc/vexor/keycloak.env
  id keycloak >/dev/null 2>&1 || useradd -r -d /opt/keycloak -s /sbin/nologin keycloak
  mkdir -p /opt/keycloak/data /opt/keycloak/conf
  chown -R keycloak:keycloak /opt/keycloak/data /opt/keycloak/conf
  if [ $NONINTERACTIVE -eq 0 ]; then
    echo "  Keycloak admin 'admin' configured. Credentials saved to /etc/vexor/keycloak.env (root-only)."
  fi
fi

# -------- 9. Initial Vexor admin user --------
if [ ! -f /etc/vexor/.admin_seeded ]; then
  say "Creating initial Vexor admin user"
  if [ $NONINTERACTIVE -eq 1 ]; then
    VX_USER="admin"
    VX_EMAIL="admin@localhost"
    VX_PW=$(genpw 16)
  else
    ask "Vexor admin username" VX_USER "admin"
    ask "Vexor admin email"    VX_EMAIL "admin@localhost"
    ask "Vexor admin password" VX_PW "$(genpw 16)" 1
  fi
  cat > /etc/vexor/.initial-admin <<EOF
VEXOR_INITIAL_ADMIN_USER=${VX_USER}
VEXOR_INITIAL_ADMIN_EMAIL=${VX_EMAIL}
VEXOR_INITIAL_ADMIN_PASSWORD=${VX_PW}
EOF
  chmod 600 /etc/vexor/.initial-admin
  touch /etc/vexor/.admin_seeded
  # NB: never print the password on stdout. It is persisted to a root-only
  # file; surface only the file path so it doesn't end up in shell history,
  # journald, or terminal scrollback.
  if [ $NONINTERACTIVE -eq 0 ]; then
    echo "  Vexor admin '$VX_USER' configured. Credentials saved to /etc/vexor/.initial-admin (root-only)."
  fi
fi

# -------- 10. Enable & start everything --------
# Self-monitor check plugins run as the naemon user and must read the
# credential env-files /etc/vexor/db.env (root:vexor) and
# /etc/vexor/keycloak.env (root:keycloak) for the MariaDB/Postgres checks.
# Add naemon to both groups BEFORE (re)starting naemon so it inherits them.
getent group vexor    >/dev/null 2>&1 && usermod -a -G vexor naemon    2>/dev/null || true
getent group keycloak >/dev/null 2>&1 && usermod -a -G keycloak naemon 2>/dev/null || true
say "Enabling services"
systemctl daemon-reload
systemctl enable --now naemon || warn "naemon failed to start - check 'journalctl -u naemon'"
# naemon runs with umask 0022 -> creates /var/log/naemon/naemon.log mode 0644.
# vexor-api (member of the naemon group) runs `naemon -v` during config
# activation and must be able to WRITE this log. Make the dir + freshly-created
# log group-writable; logrotate uses copytruncate so perms persist on rotation.
chmod 2775 /var/log/naemon 2>/dev/null || true
for _i in 1 2 3 4 5; do [ -e /var/log/naemon/naemon.log ] && break; sleep 1; done
if [ -e /var/log/naemon/naemon.log ]; then
  chgrp naemon /var/log/naemon/naemon.log 2>/dev/null || true
  chmod 0664 /var/log/naemon/naemon.log 2>/dev/null || true
fi
# -------- 10a. Bootstrap Postgres for Keycloak (MUST precede keycloak start) --------
# Keycloak fails fast with "password authentication failed for user keycloak"
# and burns through its systemd StartLimitBurst if it is started before the
# postgres role/DB exist. Create them first so keycloak connects on first try.
if [ -x /usr/libexec/vexor/setup-postgres ]; then
  /usr/libexec/vexor/setup-postgres
fi

systemctl enable --now keycloak || warn "keycloak failed to start"
systemctl enable --now vexor-api || warn "vexor-api failed to start"
# Log pipeline (self-monitor checks these): VictoriaLogs (storage) first,
# then Vector (shipper), then the log-alerts evaluator.
mkdir -p /var/lib/vector 2>/dev/null || :  # vector data_dir (vector.toml)
systemctl enable --now vexor-victorialogs 2>/dev/null || warn "vexor-victorialogs failed to start"
systemctl enable --now vexor-vector 2>/dev/null || warn "vexor-vector failed to start"
systemctl enable --now vexor-log-alerts-evaluator 2>/dev/null || warn "vexor-log-alerts-evaluator failed to start"
systemctl enable --now vexor-perfdata-collector.timer 2>/dev/null || true
systemctl enable --now vexor-nightly.timer 2>/dev/null || true
# Automatic backups (MariaDB dump, Keycloak dump, full self-backup). Their vendor
# systemd preset is 'disabled' and no preset file ships, so a fresh install would
# leave them off until manually enabled. Enable them here so scheduled backups run
# out of the box (matches what backup_verify in the nightly cycle expects to find).
systemctl enable --now vexor-db-backup.timer 2>/dev/null || true
systemctl enable --now vexor-keycloak-backup.timer 2>/dev/null || true
systemctl enable --now vexor-selfbackup.timer 2>/dev/null || true
# Allow nginx to proxy upstream (Keycloak 8180, vexor-api 8080) when SELinux is enforcing
if command -v setsebool >/dev/null 2>&1; then
  setsebool -P httpd_can_network_connect 1 2>/dev/null || true
fi
systemctl enable --now nginx

# -------- 10d. Firewall --------
# Open HTTPS (and HTTP for the redirect) so the UI is reachable out of the box.
# Skipped silently when firewalld is not in use.
if command -v firewall-cmd >/dev/null 2>&1 && systemctl is-active firewalld >/dev/null 2>&1; then
  say "Opening firewall (http, https)"
  firewall-cmd --permanent --add-service=https >/dev/null 2>&1 || true
  firewall-cmd --permanent --add-service=http  >/dev/null 2>&1 || true
  firewall-cmd --reload >/dev/null 2>&1 || true
fi


# -------- 10b. Bootstrap Keycloak realm + client + admin user --------
if [ ! -f /etc/vexor/.kc_realm_seeded ]; then
  # Keycloak provisioning makes many kcadm calls that can transiently fail on a
  # just-started server; the realm_ok/client_ok/user_ok verify below decides
  # success and leaves the sentinel unset to retry. Do not let one transient
  # kcadm/pipefail failure abort the entire vexor-setup run.
  set +e; set +o pipefail
  say "Waiting for Keycloak to come online"
  KC_OK=0
  for i in $(seq 1 60); do
    code=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8180/auth/realms/master/.well-known/openid-configuration || echo 000)
    if [ "$code" = "200" ]; then KC_OK=1; break; fi
    sleep 2
  done
  if [ $KC_OK -eq 1 ]; then
    say "Provisioning Keycloak realm 'vexor'"
    KCADM=/opt/keycloak/bin/kcadm.sh
    # Ensure kcadm can write its config under keycloak's HOME
    chown keycloak:keycloak /opt/keycloak 2>/dev/null || true
    rm -rf /opt/keycloak/.keycloak
    install -d -o keycloak -g keycloak /opt/keycloak/.keycloak
    KC_RUN="sudo -u keycloak HOME=/opt/keycloak"
    # shellcheck disable=SC1091
    . /etc/vexor/keycloak.env
    [ -z "${VX_USER:-}" ] && VX_USER=admin
    [ -z "${VX_EMAIL:-}" ] && VX_EMAIL=admin@localhost
    [ -z "${VX_PW:-}" ] && VX_PW=$(genpw 16)
    [ -f /etc/vexor/.initial-admin ] && . /etc/vexor/.initial-admin
    VX_USER=${VEXOR_INITIAL_ADMIN_USER:-$VX_USER}
    VX_EMAIL=${VEXOR_INITIAL_ADMIN_EMAIL:-$VX_EMAIL}
    VX_PW=${VEXOR_INITIAL_ADMIN_PASSWORD:-$VX_PW}

    kc_login() {
      $KC_RUN $KCADM config credentials \
        --server http://127.0.0.1:8180/auth \
        --realm master \
        --user "$KC_BOOTSTRAP_ADMIN_USERNAME" \
        --password "$KC_BOOTSTRAP_ADMIN_PASSWORD" >/dev/null 2>&1
    }
    if ! kc_login; then
      warn "kcadm login failed - recreating bootstrap admin"
      systemctl stop keycloak >/dev/null 2>&1 || :
      $KC_RUN env KC_BOOTSTRAP_ADMIN_PASSWORD="$KC_BOOTSTRAP_ADMIN_PASSWORD" \
        /opt/keycloak/bin/kc.sh bootstrap-admin user \
        --username "$KC_BOOTSTRAP_ADMIN_USERNAME" \
        --password:env KC_BOOTSTRAP_ADMIN_PASSWORD >/dev/null 2>&1 || :
      systemctl start keycloak >/dev/null 2>&1 || :
      for i in $(seq 1 60); do
        code=$(curl -s -o /dev/null -w "%{http_code}" http://127.0.0.1:8180/auth/realms/master/.well-known/openid-configuration || echo 000)
        [ "$code" = "200" ] && break
        sleep 2
      done
      kc_login || warn "kcadm login still failing after bootstrap-admin reset"
    fi

    if $KC_RUN $KCADM get realms/vexor >/dev/null 2>&1; then
      echo "  Realm 'vexor' already exists"
    else
      $KC_RUN $KCADM create realms \
        -s realm=vexor -s enabled=true \
        -s registrationAllowed=false \
        -s loginWithEmailAllowed=true \
        -s sslRequired=external >/dev/null && echo "  Realm 'vexor' created"
    fi

    # Public OIDC client for the UI (PKCE / authorization-code, no secret)
    # Restrict redirectUris/webOrigins to the actual server hostname/IPs instead
    # of "*"/"+" so a stolen vexor-ui client_id can't be reused on an attacker
    # domain to harvest tokens.
    KC_HOST_FQDN="$(hostname -f 2>/dev/null || hostname)"
    KC_REDIRECTS='['
    KC_ORIGINS='['
    first=1
    for u in "https://$KC_HOST_FQDN/*" "https://localhost/*"; do
      [ $first -eq 1 ] || { KC_REDIRECTS="$KC_REDIRECTS,"; KC_ORIGINS="$KC_ORIGINS,"; }
      KC_REDIRECTS="$KC_REDIRECTS\"$u\""
      KC_ORIGINS="$KC_ORIGINS\"${u%/*}\""
      first=0
    done
    for ip in $(hostname -I 2>/dev/null | tr ' ' '\n' | grep -v '^$' | sort -u); do
      bip="$(bracket_host "$ip")"
      KC_REDIRECTS="$KC_REDIRECTS,\"https://$bip/*\""
      KC_ORIGINS="$KC_ORIGINS,\"https://$bip\""
    done
    KC_REDIRECTS="$KC_REDIRECTS]"
    KC_ORIGINS="$KC_ORIGINS]"

    if ! $KC_RUN $KCADM get clients -r vexor -q clientId=vexor-ui --fields id 2>/dev/null | grep -q '"id"'; then
      $KC_RUN $KCADM create clients -r vexor \
        -s clientId=vexor-ui \
        -s publicClient=true \
        -s standardFlowEnabled=true \
        -s directAccessGrantsEnabled=true \
        -s "redirectUris=$KC_REDIRECTS" \
        -s "webOrigins=$KC_ORIGINS" \
        -s protocol=openid-connect >/dev/null && echo "  Client 'vexor-ui' created"
    fi

    # Roles used by the UI for RBAC
    for role in vexor-admin vexor-operator vexor-viewer; do
      $KC_RUN $KCADM create roles -r vexor -s name=$role >/dev/null 2>&1 || true
    done

    # Ensure the 'roles' client scope mappers also emit claims into the ID
    # token (Keycloak default only puts realm/client roles in the access
    # token, but our UI reads them from the ID token via react-oidc-context).
    SCOPE_ID=$($KC_RUN $KCADM get client-scopes -r vexor --fields id,name --format csv --noquotes 2>/dev/null | awk -F, '$2=="roles"{print $1}')
    if [ -n "$SCOPE_ID" ]; then
      for MNAME in "realm roles" "client roles"; do
        MID=$($KC_RUN $KCADM get client-scopes/$SCOPE_ID/protocol-mappers/models -r vexor 2>/dev/null \
          | python3 -c "import sys,json; ms=json.load(sys.stdin); print(next((m['id'] for m in ms if m.get('name')=='$MNAME'),''))" 2>/dev/null)
        if [ -n "$MID" ]; then
          $KC_RUN $KCADM update client-scopes/$SCOPE_ID/protocol-mappers/models/$MID -r vexor \
            -s 'config."id.token.claim"=true' \
            -s 'config."access.token.claim"=true' \
            -s 'config."userinfo.token.claim"=true' >/dev/null 2>&1 || true
        fi
      done
    fi

    # Initial admin user in the vexor realm
    if ! $KC_RUN $KCADM get users -r vexor -q username=$VX_USER --fields id 2>/dev/null | grep -q '"id"'; then
      $KC_RUN $KCADM create users -r vexor \
        -s username=$VX_USER -s email=$VX_EMAIL \
        -s enabled=true -s emailVerified=true >/dev/null
      $KC_RUN $KCADM set-password -r vexor --username $VX_USER --new-password "$VX_PW" >/dev/null
      $KC_RUN $KCADM add-roles -r vexor --uusername $VX_USER --rolename vexor-admin >/dev/null
      echo "  Realm admin '$VX_USER' created"
      if [ $NONINTERACTIVE -eq 1 ]; then
        echo "  -> login: $VX_USER  (password stored in /etc/vexor/.initial-admin)"
      fi
    fi

    # Validate that essential entities exist before declaring success. If any of
    # these missing, keep the sentinel unset so a re-run retries the bootstrap.
    realm_ok=0; client_ok=0; user_ok=0
    $KC_RUN $KCADM get realms/vexor >/dev/null 2>&1 && realm_ok=1
    $KC_RUN $KCADM get clients -r vexor -q clientId=vexor-ui --fields id 2>/dev/null | grep -q '"id"' && client_ok=1
    $KC_RUN $KCADM get users -r vexor -q username=$VX_USER --fields id 2>/dev/null | grep -q '"id"' && user_ok=1
    if [ $realm_ok -eq 1 ] && [ $client_ok -eq 1 ] && [ $user_ok -eq 1 ]; then
      touch /etc/vexor/.kc_realm_seeded
    else
      warn "Keycloak provisioning incomplete (realm=$realm_ok client=$client_ok user=$user_ok); will retry on next run"
    fi
  else
    warn "Keycloak not reachable after 120s - skipped realm setup. Re-run vexor-setup to retry."
  fi
  set -e; set -o pipefail
fi

# -------- 10c. Write OIDC env for vexor-api --------
# Tokens issued by Keycloak carry iss=https://<server>/auth/realms/vexor when
# users log in via the UI. The API must validate against the SAME issuer.
# Audience verification is disabled because KC default tokens have aud=account.
VX_HOST="$(hostname -f 2>/dev/null || hostname)"
if [ -z "$VX_HOST" ]; then
  VX_HOST="$(hostname -I 2>/dev/null | awk '{print $1}')"
fi
# Bracket IPv6 literals so https://[::1]/auth/... is valid
VX_HOST_URL="$(bracket_host "$VX_HOST")"
cat > /etc/vexor/oidc.env <<EOF
OIDC_ISSUER=https://${VX_HOST_URL}/auth/realms/vexor
OIDC_JWKS_URL=http://127.0.0.1:8180/auth/realms/vexor/protocol/openid-connect/certs
OIDC_AUDIENCE=account
# Audience verification is disabled because Keycloak's default access tokens
# carry aud=account; enable once an audience mapper for 'vexor-api' is added.
OIDC_VERIFY_AUD=0
# Issuer verification is disabled while the API is reached via a different
# external hostname than the internal Keycloak URL. Set to 1 in production
# deployments where the FQDN is stable.
OIDC_VERIFY_ISS=0
EOF
chmod 640 /etc/vexor/oidc.env
say "Wrote /etc/vexor/oidc.env (issuer host: $VX_HOST_URL)"
systemctl daemon-reload 2>/dev/null || true
systemctl restart vexor-api 2>/dev/null || true

# -------- 11. Wait for the API to come online --------
# vexor-api creates its database tables (including personal_access_tokens,
# consumed by the bootstrap-PAT step below) on first startup, so wait for
# /health before continuing. The initial Vexor admin is provisioned in
# Keycloak (section 10b); additional local accounts are created from the UI.
say "Waiting for API to come online"
for i in $(seq 1 30); do
  curl -sf http://127.0.0.1:8080/healthz >/dev/null && break
  sleep 2
done

# -------- 11b. Bootstrap PAT for self-monitor --------
# Mint a one-off admin-scoped Personal Access Token and persist it so the
# self-monitor installer (section 12) can authenticate to the API on a fresh
# install. Idempotent: only created when missing.
if [ ! -f /etc/vexor/.bootstrap-pat ] && [ -f /etc/vexor/db.env ]; then
  # shellcheck disable=SC1091
  . /etc/vexor/db.env
  BOOT_PAT="vxp_$(openssl rand -hex 32)"
  BOOT_HASH=$(printf '%s' "$BOOT_PAT" | sha256sum | awk '{print $1}')
  if mysql -uvexor -p"${VEXOR_DB_PASSWORD}" vexor \
       -e "INSERT INTO personal_access_tokens (username,name,token_hash,scopes,created_at) VALUES ('vexor-self-monitor','self-monitor bootstrap','${BOOT_HASH}','vexor-admin',NOW());" 2>/dev/null; then
    ( umask 077; printf 'VEXOR_BOOTSTRAP_PAT=%s\n' "$BOOT_PAT" > /etc/vexor/.bootstrap-pat )
    chmod 600 /etc/vexor/.bootstrap-pat
    say "Issued bootstrap PAT for self-monitor"
  else
    warn "Could not mint bootstrap PAT (DB/table not ready); self-monitor will be skipped this run"
  fi
fi

# -------- 12. Install self-monitoring host --------
if [ -x /usr/libexec/vexor/install-self-monitor.sh ]; then
  say "Installing vexor-self monitoring host with standard checks"
  if [ -f /etc/vexor/.bootstrap-pat ]; then
    # shellcheck disable=SC1091
    . /etc/vexor/.bootstrap-pat
    VEXOR_TOKEN="$VEXOR_BOOTSTRAP_PAT" /usr/libexec/vexor/install-self-monitor.sh \
      >/var/log/vexor/install-self-monitor.log 2>&1 \
      && say "Self-monitor installed (29 standard checks on vexor-self)" \
      || warn "Self-monitor install failed - see /var/log/vexor/install-self-monitor.log"
  else
    warn "No bootstrap PAT available; run /usr/libexec/vexor/install-self-monitor.sh manually after first login"
  fi
fi

# -------- 13. Post-setup health check --------
echo
say "Running post-setup health check"
hc_fail=0
check_unit() {
  if systemctl is-active "$1" >/dev/null 2>&1; then
    echo -e "  ${GREEN}OK${RST}    $1"
  else
    echo -e "  ${RED}FAIL${RST}  $1   (journalctl -u $1)"
    hc_fail=1
  fi
}
for u in mariadb influxdb naemon keycloak vexor-api nginx; do check_unit "$u"; done
http_code=$(curl -sk -o /dev/null -w '%{http_code}' "https://localhost/" 2>/dev/null || echo 000)
case "$http_code" in
  2*|3*|401|403) echo -e "  ${GREEN}OK${RST}    https://localhost/ reachable (HTTP $http_code)" ;;
  *)             echo -e "  ${RED}FAIL${RST}  https://localhost/ not reachable (HTTP $http_code)"; hc_fail=1 ;;
esac

# Load the generated credentials so they can be shown on screen.
VX_ADMIN_USER=""; VX_ADMIN_PW=""
if [ -f /etc/vexor/.initial-admin ]; then
  # shellcheck disable=SC1091
  . /etc/vexor/.initial-admin
  VX_ADMIN_USER="${VEXOR_INITIAL_ADMIN_USER:-}"
  VX_ADMIN_PW="${VEXOR_INITIAL_ADMIN_PASSWORD:-}"
fi
KC_ADMIN_USER="admin"; KC_ADMIN_PW=""
if [ -f /etc/vexor/keycloak.env ]; then
  # shellcheck disable=SC1091
  . /etc/vexor/keycloak.env
  KC_ADMIN_USER="${KC_BOOTSTRAP_ADMIN_USERNAME:-admin}"
  KC_ADMIN_PW="${KC_BOOTSTRAP_ADMIN_PASSWORD:-}"
fi
FQDN="$(hostname -f 2>/dev/null || hostname)"

echo
echo -e "${BOLD}${GREEN}=================================================="
echo -e "  Vexor setup complete"
echo -e "==================================================${RST}"
echo
echo -e "  ${BOLD}Vexor web UI${RST}"
echo    "    URL:       https://$FQDN/"
echo    "    Username:  ${VX_ADMIN_USER:-<see /etc/vexor/.initial-admin>}"
echo    "    Password:  ${VX_ADMIN_PW:-<see /etc/vexor/.initial-admin>}"
echo
echo -e "  ${BOLD}Keycloak admin console${RST}"
echo    "    URL:       https://$FQDN/auth/admin/"
echo    "    Username:  ${KC_ADMIN_USER}"
echo    "    Password:  ${KC_ADMIN_PW:-<see /etc/vexor/keycloak.env>}"
echo
echo -e "  ${YELLOW}Keep these safe.${RST} They are also stored in these root-only files:"
echo    "    /etc/vexor/.initial-admin   - Vexor admin username + password"
echo    "    /etc/vexor/keycloak.env     - Keycloak admin credentials"
echo    "    /etc/vexor/influx.env       - InfluxDB API token"
echo    "    /etc/vexor/db.env           - MariaDB 'vexor' user password"
echo
echo "  Re-run: vexor-setup           (interactive)"
echo "          vexor-setup --non-interactive"
echo
if [ $hc_fail -ne 0 ]; then
  warn "One or more components are not healthy yet. Check the units flagged FAIL above, then re-run: vexor-setup"
  exit 1
fi
