feat(monitor): consolidate per-job watchdogs into shared wildcard subscriber (FW-W3)
This commit is contained in:
+18
-6
@@ -723,16 +723,28 @@ delegate_publish_event() {
|
||||
start_watchdog() {
|
||||
local job_id="$1"
|
||||
local workdir="${2:-$PWD}"
|
||||
local watchdog_script="$workdir/.agents/skills/multi-agent-mux-monitor/scripts/watchdog.sh"
|
||||
local log_file="$workdir/.mam/jobs/${job_id}.watchdog.log"
|
||||
local monitor_script="$workdir/.agents/skills/multi-agent-mux-monitor/scripts/reconcile.sh"
|
||||
local log_file="$workdir/.mam/multi-agent-mux-monitor.log"
|
||||
|
||||
if [ ! -x "$watchdog_script" ]; then
|
||||
echo "ERROR: watchdog not found or not executable: $watchdog_script" >&2
|
||||
if [ ! -f "$monitor_script" ]; then
|
||||
echo "ERROR: monitor script not found: $monitor_script" >&2
|
||||
return 1
|
||||
fi
|
||||
|
||||
nohup "$watchdog_script" "$job_id" "$workdir" > "$log_file" 2>&1 &
|
||||
local pid=$!
|
||||
# Check if reconcile.sh --subscribe is already running on this workspace
|
||||
local pid
|
||||
pid=$(pgrep -f "bash $monitor_script --subscribe" || true)
|
||||
|
||||
if [ -z "$pid" ]; then
|
||||
# Start the wildcard monitor subscriber daemon with --idle-timeout 0 (never idle out)
|
||||
# and ensure it runs with $workdir as cwd to anchor relative log paths.
|
||||
local orig_pwd="$PWD"
|
||||
cd "$workdir"
|
||||
nohup bash "$monitor_script" --subscribe --idle-timeout 0 >> "$log_file" 2>&1 &
|
||||
pid=$!
|
||||
cd "$orig_pwd"
|
||||
fi
|
||||
|
||||
echo "$pid"
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user