Files
multi-agent-paper/.agents/skills/multi-agent-mux-delegate-job/multi-agent-mux-delegate-job
T
2026-06-25 12:19:24 +09:00

278 lines
11 KiB
Bash
Executable File

#!/usr/bin/env bash
# multi-agent-mux-delegate-job — user-facing orchestrator for the multi-agent-mux-delegate-job skill.
#
# Subcommands:
# submit register a job, start the subscriber FIRST, then run the agent,
# then (optionally) run a validation script.
# status show one job record.
# list list all jobs.
# verify run a user-supplied --validate script against a job's artifacts.
# wait block until all running/pending jobs reach a terminal state.
#
# This is a reference wrapper: it shells out to the python scripts that live
# next to it. Copy it into your project and customise as needed. It never hard
# fails if `claude`/`codex`/`tmux` are missing — it prints what it would run.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# Pick an interpreter: prefer a project .venv, else python3.
pick_python() {
local py_bin
if [[ -n "${DELEGATE_JOB_PYTHON:-}" ]]; then
py_bin="$DELEGATE_JOB_PYTHON"
elif [[ -x "${WORKDIR:-.}/.venv/bin/python" ]]; then
py_bin="${WORKDIR}/.venv/bin/python"
elif [[ -x ".venv/bin/python" ]]; then
py_bin="$(pwd)/.venv/bin/python"
else
py_bin="python3"
fi
if ! "$py_bin" -c "import paho.mqtt" 2>/dev/null; then
echo "ERROR: paho-mqtt package is missing for $py_bin." >&2
echo " Please create a virtual environment and install it:" >&2
echo " python3 -m venv .venv && .venv/bin/pip install -r \"$SCRIPT_DIR/requirements.txt\"" >&2
exit 1
fi
echo "$py_bin"
}
REGISTRY_DIR_DEFAULT=".mam/jobs"
usage() {
cat <<'EOF'
multi-agent-mux-delegate-job <command> [options]
submit --agent <name> --prompt <text> [--workdir <dir>] [--agent-session <label>]
[--timeout <sec>] [--idle-timeout <sec>] [--validate <script>]
[--registry-dir <dir>] [--dry-run]
# The skill is tmux-interactive only; --mode print was removed.
status --job <id> [--registry-dir <dir>]
list [--registry-dir <dir>]
verify --job <id> --validate <script> [--registry-dir <dir>]
wait [--job <id>] [--timeout <sec>] [--registry-dir <dir>]
logs <job_id> | --list # persistent audit log (delegate_job_logs/)
EOF
}
# ---- arg parsing helpers --------------------------------------------------
AGENT="claude-code"; PROMPT=""; WORKDIR="$(pwd)"; AGENT_SESSION="tmux:claude"
TIMEOUT=3600; IDLE_TIMEOUT=120; VALIDATE=""; DRY_RUN=0
JOB_ID=""; REGISTRY_DIR="$REGISTRY_DIR_DEFAULT"
parse_opts() {
while [[ $# -gt 0 ]]; do
case "$1" in
--agent) AGENT="$2"; shift 2;;
--prompt) PROMPT="$2"; shift 2;;
--workdir) WORKDIR="$2"; shift 2;;
--agent-session) AGENT_SESSION="$2"; shift 2;;
--timeout) TIMEOUT="$2"; shift 2;;
--idle-timeout) IDLE_TIMEOUT="$2"; shift 2;;
--validate) VALIDATE="$2"; shift 2;;
--job) JOB_ID="$2"; shift 2;;
--registry-dir) REGISTRY_DIR="$2"; shift 2;;
--dry-run) DRY_RUN=1; shift;;
*) echo "unknown option: $1" >&2; usage; exit 1;;
esac
done
}
cmd_submit() {
parse_opts "$@"
[[ -n "$PROMPT" ]] || { echo "submit requires --prompt" >&2; exit 1; }
PY="$(pick_python)"
cd "$WORKDIR"
mkdir -p "$REGISTRY_DIR"
# 1) register job (prints the new job id)
JOB_ID="$("$PY" "$SCRIPT_DIR/scripts/registry.py" --registry-dir "$REGISTRY_DIR" register \
--prompt "$PROMPT" --agent "$AGENT" --agent-session "$AGENT_SESSION" \
--timeout "$TIMEOUT" --idle-timeout "$IDLE_TIMEOUT")"
echo "registered job: $JOB_ID"
# 2) START THE SUBSCRIBER FIRST (ordering dependency — MQTT does not queue
# non-retained messages for absent subscribers).
local logf="$REGISTRY_DIR/$JOB_ID.subscriber.out"
"$PY" "$SCRIPT_DIR/scripts/job_subscriber.py" --registry-dir "$REGISTRY_DIR" \
--job "$JOB_ID" --timeout "$TIMEOUT" --idle-timeout "$IDLE_TIMEOUT" \
>"$logf" 2>&1 &
local sub_pid=$!
echo "subscriber pid: $sub_pid (log: $logf)"
sleep 1 # give the subscriber time to CONNACK + SUBSCRIBE before the agent runs
# 3) run the agent (or print the command for dry-run / missing binary)
local pub="$PY $SCRIPT_DIR/scripts/publish_event.py --registry-dir $REGISTRY_DIR --job $JOB_ID"
# NOTE: the agent MUST use --job "$JOB_ID" (the one we just minted). Hard-coding
# an id from an earlier session is the #1 reason a delegated job sits idle and
# times out (see SKILL.md "Wrong job_id propagated to the agent"). We make the
# freshness explicit in the instruction header.
local instructions="Your job_id is \"$JOB_ID\" (the one just registered for THIS delegation — read it from the registry record, do NOT reuse any job_id you saw in earlier runs).
On start run: $pub --event started.
On permission/tool prompt run: $pub --event permission_required --detail '<tool>:<what>'.
On progress (optional): $pub --event progress --detail '<short status>'.
On success run: $pub --event completed --detail '<one-line summary>'.
On failure run: $pub --event error --detail '<one-line reason>'.
The subscriber for this job_id is already running; your completed/error event ends the job. Exit codes: 0 completed, 1 error, 2 publish failure.
Task: $PROMPT"
run_agent "$JOB_ID" "$instructions"
# 4) optional validation hook
if [[ -n "$VALIDATE" ]]; then
echo "running validation: $VALIDATE"
if JOB_ID="$JOB_ID" REGISTRY_DIR="$REGISTRY_DIR" bash "$VALIDATE"; then
echo "validation: PASS"
else
local rc=$?
echo "validation: FAIL (exit $rc)"
fi
fi
if [[ "$DRY_RUN" == "1" ]]; then
# In dry-run we never started a real subscriber (the wrapper short-circuits
# before launching one), but the wait below would still try to join the
# background sub_pid from cmd_submit. Skip both the wait and the subscriber
# log dump; the user just wants to see the instruction that would have run.
local logs_root_dry="${DELEGATE_JOB_LOGS_DIR:-$WORKDIR/delegate_job_logs}"
echo "$logs_root_dry/$JOB_ID"
return 0
fi
wait "$sub_pid" || true
echo "subscriber output:"; cat "$logf" || true
# Last stdout line: the persistent audit-log dir for this job (see SKILL.md
# "Audit Logs"). Callers can scrape `tail -n1` to find it.
local logs_root="${DELEGATE_JOB_LOGS_DIR:-$WORKDIR/delegate_job_logs}"
echo "$logs_root/$JOB_ID"
}
run_agent() {
local job_id="$1"; local instructions="$2"
# The skill is INTERACTIVE-ONLY. We never invoke `claude -p` or any other
# one-shot print mode, because:
# - claude -p exits the moment stdin is drained, so there's nothing to
# `tmux attach` to afterwards.
# - fire-and-forget via wrapper defeats the whole point of the audit log
# (you can't tell what happened if the agent crashes mid-turn).
# - the job registry already gives us an authoritative completion signal,
# so we don't need a wrapper-side exit code to know "done".
# The user attaches with `tmux attach -t <session>` and types follow-up
# prompts themselves. We pre-load the first prompt via stdin and `read`
# keeps the pane open after the agent exits so the user can review.
if [ "$AGENT" = "human" ]; then
echo "[human agent] complete the task, then run publish_event.py --event completed"
return
fi
local sess="${AGENT_SESSION#tmux:}"
if [[ "$DRY_RUN" == "1" ]]; then
echo "[dry-run] would delegate task to running agent '$AGENT' in tmux session '$sess' with instructions:"
echo "----"; echo "$instructions"; echo "----"
return
fi
if ! command -v tmux >/dev/null 2>&1; then
echo "ERROR: this skill requires tmux (interactive agent sessions)." >&2
echo " Install with: brew install tmux (or your package manager)" >&2
return 1
fi
local _tmux="tmux"
if [ -n "${TMUX_SERVER_NAME:-}" ]; then
_tmux="tmux -L $TMUX_SERVER_NAME"
fi
if ! $_tmux has-session -t "$sess" 2>/dev/null; then
echo "ERROR: 에이전트 세션 '$sess'이 존재하지 않습니다. 작업을 위임하기 전에 먼저 에이전트 세션을 기동해 주세요." >&2
echo " 팁: 'multi-agent-mux-resume' 또는 'multi-agent-mux-create'를 통해 에이전트를 먼저 생성할 수 있습니다." >&2
return 1
fi
# Before launching the agent, set up error trap to publish error event
if [ -n "${job_id:-}" ] && [ -n "${PY:-}" ]; then
local pub_script="$SCRIPT_DIR/scripts/publish_event.py"
trap 'rc=$?; if [ $rc -ne 0 ]; then "$PY" "$pub_script" --job "$job_id" --event error --detail "agent bootstrap failed (exit $rc)"; fi' EXIT
fi
echo "살아있는 에이전트 세션 '$sess'에 작업을 위임합니다..."
$_tmux set-buffer -b "job_buf_$job_id" "$instructions"
$_tmux paste-buffer -b "job_buf_$job_id" -t "$sess"
$_tmux send-keys -t "$sess" C-m
$_tmux delete-buffer -b "job_buf_$job_id"
echo "작업이 세션 '$sess'에 전송되었습니다. (연결하려면: $_tmux attach -t $sess)"
trap - EXIT
}
cmd_status() {
parse_opts "$@"
[[ -n "$JOB_ID" ]] || { echo "status requires --job" >&2; exit 1; }
PY="$(pick_python)"
"$PY" "$SCRIPT_DIR/scripts/registry.py" --registry-dir "$REGISTRY_DIR" get --job "$JOB_ID"
}
cmd_list() {
parse_opts "$@"
PY="$(pick_python)"
"$PY" "$SCRIPT_DIR/scripts/registry.py" --registry-dir "$REGISTRY_DIR" list
}
cmd_verify() {
parse_opts "$@"
[[ -n "$JOB_ID" ]] || { echo "verify requires --job" >&2; exit 1; }
[[ -n "$VALIDATE" ]] || { echo "verify requires --validate <script>" >&2; exit 1; }
echo "verifying job $JOB_ID with $VALIDATE"
if JOB_ID="$JOB_ID" REGISTRY_DIR="$REGISTRY_DIR" bash "$VALIDATE"; then
echo "verify: PASS (exit 0)"; exit 0
else
rc=$?; echo "verify: FAIL (exit $rc)"; exit "$rc"
fi
}
cmd_logs() {
# logs <job_id> | logs --list — delegates to registry.py's logs CLI, which
# reads the persistent audit log under $DELEGATE_JOB_LOGS_DIR (or
# <cwd>/delegate_job_logs). Run from your project dir so the default resolves.
PY="$(pick_python)"
if [[ "${1:-}" == "--list" ]]; then
"$PY" "$SCRIPT_DIR/scripts/registry.py" logs --list
else
local jid="${1:-}"
[[ -n "$jid" ]] || { echo "logs requires <job_id> or --list" >&2; exit 1; }
"$PY" "$SCRIPT_DIR/scripts/registry.py" logs "$jid"
fi
}
cmd_wait() {
parse_opts "$@"
PY="$(pick_python)"
if [[ -n "$JOB_ID" ]]; then
"$PY" "$SCRIPT_DIR/scripts/job_subscriber.py" --registry-dir "$REGISTRY_DIR" \
--job "$JOB_ID" --timeout "$TIMEOUT"
else
"$PY" "$SCRIPT_DIR/scripts/job_subscriber.py" --registry-dir "$REGISTRY_DIR" \
--wait-any --timeout "$TIMEOUT"
fi
}
main() {
local sub="${1:-}"; shift || true
case "$sub" in
submit) cmd_submit "$@";;
status) cmd_status "$@";;
list) cmd_list "$@";;
verify) cmd_verify "$@";;
wait) cmd_wait "$@";;
logs) cmd_logs "$@";;
""|-h|--help|help) usage;;
*) echo "unknown command: $sub" >&2; usage; exit 1;;
esac
}
main "$@"