66 lines
1.8 KiB
Bash
Executable File
66 lines
1.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# watchdog.sh — multi-agent-mux-monitor 의 부속 스크립트
|
|
#
|
|
# Metadata for SKILL.md:
|
|
# description: "Watchdog helper that keeps subscriber alive and exits when JOB is done"
|
|
# usage: "watchdog.sh <job_id> <workdir> [--help]"
|
|
|
|
if [ "${1:-}" = "--help" ] || [ "${1:-}" = "-h" ] || [ $# -lt 2 ]; then
|
|
echo "Usage: $0 <job_id> <workdir>"
|
|
exit 0
|
|
fi
|
|
|
|
JOB_ID="$1"
|
|
WORKDIR="$2"
|
|
LOG_DIR="$WORKDIR/.mam/jobs"
|
|
|
|
mkdir -p "$LOG_DIR"
|
|
|
|
log() {
|
|
echo "[$(date -u +'%Y-%m-%dT%H:%M:%SZ')] $*"
|
|
}
|
|
|
|
log "watchdog started for JOB=$JOB_ID workdir=$WORKDIR"
|
|
|
|
while true; do
|
|
# 1) Get current job status with robust Python parsing
|
|
STATUS=$(cd "$WORKDIR" && .venv/bin/python .agents/skills/multi-agent-mux-delegate-job/scripts/registry.py get --job "$JOB_ID" 2>/dev/null | python3 -c '
|
|
import sys, json
|
|
try:
|
|
data = json.load(sys.stdin)
|
|
print(data.get("status", "unknown"))
|
|
except Exception:
|
|
print("unknown")
|
|
' 2>/dev/null || echo "unknown")
|
|
|
|
log "JOB status: $STATUS"
|
|
|
|
# 2) Terminal check
|
|
case "$STATUS" in
|
|
completed|error|permission_required)
|
|
log "JOB reached terminal state ($STATUS), watchdog exiting"
|
|
exit 0
|
|
;;
|
|
esac
|
|
|
|
# 3) Start subscriber (2min hard limit)
|
|
LOG_FILE="$LOG_DIR/subscriber-${JOB_ID}-$(date +%s).log"
|
|
log "starting subscriber (2min hard limit, log: $LOG_FILE)"
|
|
|
|
(
|
|
cd "$WORKDIR" && timeout 120 .venv/bin/python .agents/skills/multi-agent-mux-delegate-job/scripts/job_subscriber.py \
|
|
--job "$JOB_ID" --timeout 120 --idle-timeout 999999 --registry-dir .mam/jobs > "$LOG_FILE" 2>&1
|
|
echo "[$(date -u +'%Y-%m-%dT%H:%M:%SZ')] subscriber exited" >> "$LOG_FILE"
|
|
) &
|
|
|
|
SUB_PID=$!
|
|
log "subscriber PID=$SUB_PID"
|
|
|
|
# 4) Wait for subscriber to exit or timeout
|
|
wait $SUB_PID 2>/dev/null
|
|
EXIT_CODE=$?
|
|
log "subscriber exited code=$EXIT_CODE"
|
|
|
|
sleep 1
|
|
done
|