#!/bin/bash
# vexor_check_systemd_unit — Nagios/NRPE plugin: reports systemd unit state
# AND how long the unit has been in that state (e.g. running for 2d 5h).
#
# Usage: vexor_check_systemd_unit <unit-name>
#   OK       - unit is active
#   WARNING  - unit is activating / reloading
#   CRITICAL - unit is inactive / failed / unknown / missing
#
# Perfdata: state=<0|1|2|3>, age_seconds=<secs>
set -u

# NRPE often runs plugins with a minimal/empty PATH; set a sane one so
# systemctl/date/etc. are found regardless of the agent's environment.
export PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:${PATH:-}"

UNIT="${1:-}"
if [ -z "$UNIT" ]; then
    echo "UNKNOWN: usage: $(basename "$0") <unit>"
    exit 3
fi

SYSTEMCTL=""
for c in systemctl /usr/bin/systemctl /bin/systemctl /usr/sbin/systemctl /sbin/systemctl; do
    if command -v "$c" >/dev/null 2>&1; then
        SYSTEMCTL=$(command -v "$c")
        break
    elif [ -x "$c" ]; then
        SYSTEMCTL="$c"
        break
    fi
done
if [ -z "$SYSTEMCTL" ]; then
    echo "UNKNOWN: systemctl not found (no systemd, or PATH restricted)"
    exit 3
fi

STATE=$("$SYSTEMCTL" is-active "$UNIT" 2>&1 | head -n1)
SUB=$("$SYSTEMCTL" show -p SubState --value "$UNIT" 2>/dev/null)
LOAD=$("$SYSTEMCTL" show -p LoadState --value "$UNIT" 2>/dev/null)

if [ "$LOAD" = "not-found" ] || [ -z "$LOAD" ]; then
    echo "CRITICAL: $UNIT not found on this host | state=2;;;; age_seconds=0;;;;"
    exit 2
fi

case "$STATE" in
    active)
        TS=$("$SYSTEMCTL" show -p ActiveEnterTimestamp --value "$UNIT" 2>/dev/null)
        ;;
    *)
        TS=$("$SYSTEMCTL" show -p InactiveEnterTimestamp --value "$UNIT" 2>/dev/null)
        [ -z "$TS" ] || [ "$TS" = "n/a" ] && TS=$("$SYSTEMCTL" show -p ActiveExitTimestamp --value "$UNIT" 2>/dev/null)
        ;;
esac

if [ -n "${TS:-}" ] && [ "$TS" != "n/a" ]; then
    EPOCH=$(date -d "$TS" +%s 2>/dev/null || echo 0)
else
    EPOCH=0
fi

if [ "$EPOCH" -gt 0 ]; then
    NOW=$(date +%s)
    SECS=$(( NOW - EPOCH ))
    [ "$SECS" -lt 0 ] && SECS=0
else
    SECS=0
fi

# Pretty duration
if [ "$SECS" -ge 86400 ]; then
    DUR="$((SECS/86400))d $((SECS%86400/3600))h"
elif [ "$SECS" -ge 3600 ]; then
    DUR="$((SECS/3600))h $((SECS%3600/60))m"
elif [ "$SECS" -ge 60 ]; then
    DUR="$((SECS/60))m $((SECS%60))s"
else
    DUR="${SECS}s"
fi

case "$STATE" in
    active)
        printf 'OK: %s is %s (%s) for %s | state=0;;;; age_seconds=%d;;;;\n' \
            "$UNIT" "$STATE" "$SUB" "$DUR" "$SECS"
        exit 0
        ;;
    activating|reloading|deactivating)
        printf 'WARNING: %s is %s (%s) for %s | state=1;;;; age_seconds=%d;;;;\n' \
            "$UNIT" "$STATE" "$SUB" "$DUR" "$SECS"
        exit 1
        ;;
    inactive|failed)
        printf 'CRITICAL: %s is %s (%s) for %s | state=2;;;; age_seconds=%d;;;;\n' \
            "$UNIT" "$STATE" "$SUB" "$DUR" "$SECS"
        exit 2
        ;;
    *)
        printf 'UNKNOWN: %s state=%s (sub=%s) | state=3;;;; age_seconds=%d;;;;\n' \
            "$UNIT" "$STATE" "$SUB" "$SECS"
        exit 3
        ;;
esac
