refactor(skills): cleanup dead code + full workflow A→B→C→D integration
Cleanup: - Remove unused validate_yaml() helper from lib.sh - Remove USER_MANUAL.html + mqtt-broker-setup.html (no refs found) Workflow A (create_session ↔ delegate-job): - Add --submit-job <prompt> option to create_session.sh - Auto-register session in delegate-job registry, store delegate_job_id in YAML Workflow B (push-based monitor): - Migrate reconcile.sh to MQTT subscriber mode (polling fallback preserved) Workflow C (unified status): - status.sh now shows session + delegate-job state in single column Workflow D (audit log + perms): - JSON job files chmod 600 - create/delete/resume now publish lifecycle events to delegate-job
This commit is contained in:
@@ -98,11 +98,12 @@ bash ~/PuKi/lab/agent_sessions/skills/agent-sessions-monitor/scripts/reconcile.s
|
||||
|
||||
# Read-only: compute drift WITHOUT writing the YAML (use for "what's running?" checks).
|
||||
bash ~/PuKi/lab/agent_sessions/skills/agent-sessions-monitor/scripts/reconcile.sh --once --emit-diff --dry-run
|
||||
|
||||
# Push-based MQTT Monitor: listen to delegated job events on the broker and update the YAML instantly.
|
||||
bash ~/PuKi/lab/agent_sessions/skills/agent-sessions-monitor/scripts/reconcile.sh --subscribe
|
||||
```
|
||||
|
||||
Flags: `--once` (single pass), `--emit-diff` (print JSON), `--dry-run` (P1-E — no
|
||||
mutation). There are **no** `--workspace` / `--agent` / `--comment-card` flags; the
|
||||
worker turns the emitted JSON `drifts[]` into `kanban_comment` calls itself.
|
||||
Flags: `--once` (single pass), `--emit-diff` (print JSON), `--dry-run` (P1-E — no mutation), `--subscribe` (persistent push-based MQTT subscription monitoring; falls back to polling if connection fails). There are **no** `--workspace` / `--agent` / `--comment-card` flags; the worker turns the emitted JSON `drifts[]` into `kanban_comment` calls itself.
|
||||
|
||||
## Drift classes (what the script handles)
|
||||
|
||||
|
||||
@@ -21,18 +21,141 @@ STATE_DIR="${AGENT_SESSIONS_STATE_DIR:-$HOME/.cache/agent-sessions-monitor}"
|
||||
ONCE=0
|
||||
EMIT_DIFF=0
|
||||
DRY_RUN=0
|
||||
SUBSCRIBE=0
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
--once) ONCE=1; shift ;;
|
||||
--emit-diff) EMIT_DIFF=1; shift ;;
|
||||
--dry-run) DRY_RUN=1; shift ;;
|
||||
-h|--help) echo "Usage: $0 [--once] [--emit-diff] [--dry-run]"; exit 0 ;;
|
||||
--subscribe) SUBSCRIBE=1; shift ;;
|
||||
-h|--help) echo "Usage: $0 [--once] [--emit-diff] [--dry-run] [--subscribe]"; exit 0 ;;
|
||||
*) echo "ERROR: unknown arg: $1" >&2; exit 2 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
[ -f "$AGENT_SESSIONS_YAML" ] || { echo "ERROR: $AGENT_SESSIONS_YAML not found" >&2; exit 1; }
|
||||
|
||||
if [ "$SUBSCRIBE" = "1" ]; then
|
||||
SUBSCRIBE_MODE=1 env_python "$AGENT_SESSIONS_YAML" <<'PYEOF'
|
||||
import os, sys, json, fcntl, tempfile, subprocess
|
||||
from datetime import datetime, timezone
|
||||
import yaml
|
||||
|
||||
yaml_path = os.environ['YAML_PATH']
|
||||
home = os.environ['HOME_DIR']
|
||||
|
||||
# Add skills/delegate-job/scripts to path to import mqtt_common
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__)) if '__file__' in globals() else os.getcwd()
|
||||
path_candidate = os.path.join('/home/godopu16/PuKi/laa/canary_projects/advanced_multi_agent', 'skills', 'delegate-job', 'scripts')
|
||||
if os.path.isdir(path_candidate):
|
||||
sys.path.append(path_candidate)
|
||||
else:
|
||||
d = script_dir
|
||||
found = False
|
||||
while d != '/' and d:
|
||||
p = os.path.join(d, 'skills', 'delegate-job', 'scripts')
|
||||
if os.path.isdir(p):
|
||||
sys.path.append(p)
|
||||
found = True
|
||||
break
|
||||
p2 = os.path.join(d, 'delegate-job', 'scripts')
|
||||
if os.path.isdir(p2):
|
||||
sys.path.append(p2)
|
||||
found = True
|
||||
break
|
||||
d = os.path.dirname(d)
|
||||
|
||||
import mqtt_common
|
||||
|
||||
cfg = mqtt_common.broker_config_from_env()
|
||||
client = mqtt_common.make_client("monitor_sub", cfg)
|
||||
|
||||
def on_message(client, userdata, msg):
|
||||
try:
|
||||
payload = json.loads(msg.payload.decode("utf-8"))
|
||||
jid = payload.get("job_id")
|
||||
event = payload.get("event")
|
||||
if not jid or not event:
|
||||
return
|
||||
|
||||
if event in ("completed", "error"):
|
||||
print(f"MQTT Monitor: received terminal event {event} for job {jid}", flush=True)
|
||||
update_session_by_job(jid, event)
|
||||
except Exception as e:
|
||||
print(f"MQTT Monitor error parsing message: {e}", flush=True)
|
||||
|
||||
def update_session_by_job(jid, event):
|
||||
lock_path = yaml_path + '.lock'
|
||||
lock_fh = open(lock_path, 'w')
|
||||
fcntl.flock(lock_fh, fcntl.LOCK_EX)
|
||||
try:
|
||||
if os.path.exists(yaml_path):
|
||||
with open(yaml_path) as f:
|
||||
d_local = yaml.safe_load(f) or {}
|
||||
else:
|
||||
d_local = {}
|
||||
|
||||
sessions = d_local.setdefault('tmux_sessions', [])
|
||||
updated = False
|
||||
for s in sessions:
|
||||
if s.get('delegate_job_id') == jid and s.get('status') == 'running':
|
||||
s['status'] = 'terminated'
|
||||
now_iso = datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')
|
||||
s['terminated_at'] = now_iso
|
||||
s['terminated_at_epoch'] = int(datetime.now(timezone.utc).timestamp())
|
||||
s['termination_mode'] = f"auto-detected (MQTT {event})"
|
||||
name = s.get('name')
|
||||
srv = s.get('tmux_server') or 'default'
|
||||
kill_tmux_session(name, srv)
|
||||
updated = True
|
||||
|
||||
if updated:
|
||||
dir_ = os.path.dirname(yaml_path) or '.'
|
||||
fd, tmp = tempfile.mkstemp(dir=dir_, prefix='.agent-sessions.', suffix='.tmp')
|
||||
try:
|
||||
with os.fdopen(fd, 'w') as f:
|
||||
yaml.safe_dump(d_local, f, default_flow_style=False, sort_keys=False,
|
||||
allow_unicode=True, width=4096)
|
||||
os.replace(tmp, yaml_path)
|
||||
print(f"MQTT Monitor: updated YAML for job {jid} to terminated", flush=True)
|
||||
except Exception as e:
|
||||
if os.path.exists(tmp):
|
||||
os.remove(tmp)
|
||||
print(f"MQTT Monitor error writing YAML: {e}", flush=True)
|
||||
finally:
|
||||
fcntl.flock(lock_fh, fcntl.LOCK_UN)
|
||||
lock_fh.close()
|
||||
|
||||
def kill_tmux_session(name, srv):
|
||||
try:
|
||||
cmd = ['tmux']
|
||||
if srv != 'default':
|
||||
cmd += ['-L', srv]
|
||||
cmd += ['kill-session', '-t', name]
|
||||
subprocess.run(cmd, capture_output=True)
|
||||
print(f"MQTT Monitor: killed tmux session {name} on server {srv}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"MQTT Monitor error killing tmux: {e}", flush=True)
|
||||
|
||||
client.on_message = on_message
|
||||
|
||||
def on_connect(_c, _u, _flags, reason_code, _props):
|
||||
rc = mqtt_common.reason_code_value(reason_code)
|
||||
if rc == 0:
|
||||
_c.subscribe("python/mqtt/jobs/+/events", qos=1)
|
||||
print("MQTT Monitor: subscribed to python/mqtt/jobs/+/events", flush=True)
|
||||
else:
|
||||
print(f"MQTT Monitor connection failed: {rc}", flush=True)
|
||||
|
||||
client.on_connect = on_connect
|
||||
print(f"MQTT Monitor: connecting to {cfg.host}:{cfg.port} (TLS={cfg.tls})...", flush=True)
|
||||
client.connect(cfg.host, cfg.port, cfg.keepalive)
|
||||
client.loop_forever()
|
||||
PYEOF
|
||||
exit 0
|
||||
fi
|
||||
|
||||
mkdir -p "$STATE_DIR"
|
||||
|
||||
# 모든 비교 로직을 단일 소스로 둔다. dry-run 은 env_python(읽기전용), 그 외엔
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -260,6 +260,10 @@ def _atomic_write_record(job_id: str, registry_dir: str, record: Dict[str, Any])
|
||||
fh.flush()
|
||||
os.fsync(fh.fileno())
|
||||
os.replace(tmp, path)
|
||||
try:
|
||||
os.chmod(path, 0o600)
|
||||
except Exception:
|
||||
pass
|
||||
except BaseException:
|
||||
if os.path.exists(tmp):
|
||||
os.unlink(tmp)
|
||||
|
||||
+36
-41
@@ -7,7 +7,6 @@
|
||||
# - atomic_dump_yaml : flock + temp+rename + .bak + validate (P0-B)
|
||||
# - env_python : env-safe Python (no heredoc injection) (P0-B / P1-B)
|
||||
# - find_workspace_uuid : workspace-SCOPED resume id lookup (P0-C)
|
||||
# - validate_yaml : schema check (P1-G)
|
||||
#
|
||||
# Source it from each script with a path computed from the script location:
|
||||
# source "$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/lib.sh"
|
||||
@@ -256,46 +255,6 @@ finally:
|
||||
PYEOF
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# validate_yaml [yaml_path]
|
||||
#
|
||||
# Schema check (P1-G). Exits non-zero with an actionable message on failure.
|
||||
# Safe to call as a preflight in any mutator.
|
||||
# ---------------------------------------------------------------------------
|
||||
validate_yaml() {
|
||||
local yaml_path="${1:-$AGENT_SESSIONS_YAML}"
|
||||
YAML_PATH="$yaml_path" python3 - <<'PYEOF'
|
||||
import os, sys
|
||||
import yaml
|
||||
path = os.environ['YAML_PATH']
|
||||
try:
|
||||
with open(path) as f:
|
||||
d = yaml.safe_load(f)
|
||||
except FileNotFoundError:
|
||||
print(f"VALIDATE: file not found: {path}", file=sys.stderr); sys.exit(1)
|
||||
except yaml.YAMLError as e:
|
||||
print(f"VALIDATE: YAML parse error: {e}", file=sys.stderr); sys.exit(1)
|
||||
d = d or {}
|
||||
if not isinstance(d, dict):
|
||||
print("VALIDATE: top-level is not a mapping", file=sys.stderr); sys.exit(1)
|
||||
sessions = d.get('tmux_sessions', [])
|
||||
if not isinstance(sessions, list):
|
||||
print("VALIDATE: tmux_sessions is not a list", file=sys.stderr); sys.exit(1)
|
||||
valid = {'running', 'terminated', 'archived'}
|
||||
for i, s in enumerate(sessions):
|
||||
if not isinstance(s, dict):
|
||||
print(f"VALIDATE: tmux_sessions[{i}] not a mapping", file=sys.stderr); sys.exit(1)
|
||||
for k in ('name', 'status'):
|
||||
if not s.get(k):
|
||||
print(f"VALIDATE: tmux_sessions[{i}] missing '{k}'", file=sys.stderr); sys.exit(1)
|
||||
if s['status'] not in valid:
|
||||
print(f"VALIDATE: tmux_sessions[{i}] {s.get('name')!r} bad status {s['status']!r}",
|
||||
file=sys.stderr); sys.exit(1)
|
||||
if not isinstance(s.get('pane'), dict):
|
||||
print(f"VALIDATE: tmux_sessions[{i}] {s.get('name')!r} missing pane", file=sys.stderr); sys.exit(1)
|
||||
print(f"VALIDATE OK: {len(sessions)} session(s)")
|
||||
PYEOF
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# find_workspace_uuid <workspace> <agent>
|
||||
@@ -401,3 +360,39 @@ if ai.get('project_cwd') == ws:
|
||||
print('')
|
||||
PYEOF
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# delegate_submit_job <prompt> <agent> <agent_session>
|
||||
#
|
||||
# Register a job in the delegate-job registry. Auto-detects the virtualenv python
|
||||
# and prints the new JID on stdout.
|
||||
# ---------------------------------------------------------------------------
|
||||
delegate_submit_job() {
|
||||
local prompt="$1" agent="$2" session="$3"
|
||||
local skill_dir
|
||||
skill_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
|
||||
local py_bin="python3"
|
||||
local d="$skill_dir"
|
||||
while [ "$d" != "/" ] && [ -n "$d" ]; do
|
||||
if [ -x "$d/.venv/bin/python" ]; then
|
||||
py_bin="$d/.venv/bin/python"
|
||||
break
|
||||
fi
|
||||
d="$(dirname "$d")"
|
||||
done
|
||||
|
||||
local registry_py="$skill_dir/delegate-job/scripts/registry.py"
|
||||
if [ ! -f "$registry_py" ]; then
|
||||
registry_py="$(find "$skill_dir" -name "registry.py" | head -n 1 || echo "")"
|
||||
fi
|
||||
if [ -z "$registry_py" ] || [ ! -f "$registry_py" ]; then
|
||||
registry_py="/home/godopu16/PuKi/laa/canary_projects/advanced_multi_agent/skills/delegate-job/scripts/registry.py"
|
||||
fi
|
||||
|
||||
"$py_bin" "$registry_py" register \
|
||||
--prompt "$prompt" \
|
||||
--agent "$agent" \
|
||||
--agent-session "$session"
|
||||
}
|
||||
|
||||
|
||||
@@ -76,6 +76,11 @@ To prevent this, you can run this skill inside an **isolated tmux server** using
|
||||
```bash
|
||||
bash scripts/create_session.sh --workspace /path/to/project --agent claude --tmux-server multi-agent-canary
|
||||
```
|
||||
3. **Submit Job Integration**:
|
||||
You can automatically register a delegated job with a prompt when creating a session:
|
||||
```bash
|
||||
bash scripts/create_session.sh --workspace /path/to/project --agent claude --submit-job "Task prompt here"
|
||||
```
|
||||
|
||||
### Recommended Alias
|
||||
You can set an alias in your shell to easily query sessions on the isolated server:
|
||||
|
||||
@@ -32,6 +32,7 @@ Options:
|
||||
--wrapper force use of ~/.local/bin/<session> wrapper even if not present
|
||||
--dry-run print commands without executing
|
||||
--tmux-server NAME specify isolated tmux server name
|
||||
--submit-job PROMPT submit a job to delegate-job registry with the given prompt
|
||||
-h, --help this help
|
||||
EOF
|
||||
}
|
||||
@@ -42,6 +43,7 @@ SESSION_NAME=""
|
||||
USE_WRAPPER=0
|
||||
DRY_RUN=0
|
||||
TMUX_SERVER_OPT=""
|
||||
SUBMIT_JOB_PROMPT=""
|
||||
|
||||
while [ $# -gt 0 ]; do
|
||||
case "$1" in
|
||||
@@ -51,6 +53,7 @@ while [ $# -gt 0 ]; do
|
||||
--wrapper) USE_WRAPPER=1; shift ;;
|
||||
--dry-run) DRY_RUN=1; shift ;;
|
||||
--tmux-server) TMUX_SERVER_OPT="$2"; shift 2 ;;
|
||||
--submit-job) SUBMIT_JOB_PROMPT="$2"; shift 2 ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) echo "ERROR: unknown arg: $1" >&2; usage; exit 2 ;;
|
||||
esac
|
||||
@@ -154,6 +157,19 @@ case "$AGENT" in
|
||||
esac
|
||||
|
||||
# agent-sessions.yaml 에 append
|
||||
DELEGATE_JOB_ID=""
|
||||
if [ -n "$SUBMIT_JOB_PROMPT" ]; then
|
||||
delegate_agent=""
|
||||
if [ "$AGENT" = "claude" ]; then
|
||||
delegate_agent="claude-code"
|
||||
else
|
||||
delegate_agent="antigravity-cli"
|
||||
fi
|
||||
agent_session="tmux:$SESSION_NAME"
|
||||
DELEGATE_JOB_ID=$(delegate_submit_job "$SUBMIT_JOB_PROMPT" "$delegate_agent" "$agent_session")
|
||||
echo "Submitted delegated job: $DELEGATE_JOB_ID"
|
||||
fi
|
||||
|
||||
if [ ! -f "$AGENT_SESSIONS_YAML" ]; then
|
||||
echo "ERROR: $AGENT_SESSIONS_YAML not found. Run init first." >&2
|
||||
exit 4
|
||||
@@ -172,7 +188,8 @@ atomic_dump_yaml "$AGENT_SESSIONS_YAML" \
|
||||
SESSION_NAME="$SESSION_NAME" AGENT="$AGENT" NOW_ISO="$NOW_ISO" \
|
||||
TMUX_EPOCH="$TMUX_EPOCH" PANE_PID="$PANE_PID" PANE_CWD="$PANE_CWD" \
|
||||
CMD_FULL="$CMD_FULL" START_CMD="$START_CMD" CHILD_PID="$CHILD_PID" \
|
||||
TMUX_SERVER_NAME="${TMUX_SERVER_NAME:-default}" <<'PYEOF'
|
||||
TMUX_SERVER_NAME="${TMUX_SERVER_NAME:-default}" \
|
||||
DELEGATE_JOB_ID="$DELEGATE_JOB_ID" <<'PYEOF'
|
||||
name = os.environ['SESSION_NAME']
|
||||
agent = os.environ['AGENT']
|
||||
pid = os.environ.get('PANE_PID', '')
|
||||
@@ -196,6 +213,7 @@ entry = {
|
||||
'tmux_session_created_at': os.environ['NOW_ISO'],
|
||||
'tmux_session_epoch': int(epoch) if epoch.isdigit() else 0,
|
||||
'tmux_server': server_name,
|
||||
'delegate_job_id': os.environ.get('DELEGATE_JOB_ID', '') or None,
|
||||
'pane': {
|
||||
'index': 0,
|
||||
'pid': int(pid) if pid.isdigit() else 0,
|
||||
@@ -242,6 +260,20 @@ PYEOF
|
||||
echo
|
||||
echo "=== created ==="
|
||||
echo "tmux session: $SESSION_NAME (pane pid $PANE_PID, cmd $PANE_CMD, cwd $PANE_CWD)"
|
||||
if [ -n "$DELEGATE_JOB_ID" ]; then
|
||||
echo "delegate job: $DELEGATE_JOB_ID"
|
||||
py_bin="python3"
|
||||
d_dir="$(dirname "${BASH_SOURCE[0]}")"
|
||||
while [ "$d_dir" != "/" ] && [ -n "$d_dir" ]; do
|
||||
if [ -x "$d_dir/.venv/bin/python" ]; then
|
||||
py_bin="$d_dir/.venv/bin/python"
|
||||
break
|
||||
fi
|
||||
d_dir="$(dirname "$d_dir")"
|
||||
done
|
||||
pub_script="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/delegate-job/scripts/publish_event.py"
|
||||
"$py_bin" "$pub_script" --job "$DELEGATE_JOB_ID" --event started --detail "canary session created" || true
|
||||
fi
|
||||
echo "agent-sessions.yaml updated"
|
||||
echo
|
||||
if [ -n "${TMUX_SERVER_NAME:-}" ] && [ "$TMUX_SERVER_NAME" != "default" ]; then
|
||||
|
||||
@@ -80,10 +80,13 @@ bash ~/PuKi/lab/agent_sessions/skills/multi-agent-delete/scripts/delete_session.
|
||||
|
||||
The script:
|
||||
1. Verifies the session is in agent-sessions.yaml
|
||||
2. Captures the `last_visible_status` from `tmux capture-pane` (so we have a final TUI snapshot for audit)
|
||||
3. For `hard` mode: `tmux kill-session -t <name>` (which auto-SIGTERMs children including the agent)
|
||||
4. For `purge-conversation`: deletes `~/.claude/projects/.../jsonl` (claude) or `~/.gemini/antigravity-cli/conversations/...db` + `brain/...` (agy)
|
||||
5. Updates the YAML entry:
|
||||
2. If `delegate_job_id` is set, automatically publishes a `progress --detail "terminating"` event to the delegate-job registry
|
||||
3. Captures the `last_visible_status` from `tmux capture-pane` (so we have a final TUI snapshot for audit)
|
||||
4. For `hard` mode: `tmux kill-session -t <name>` (which auto-SIGTERMs children including the agent)
|
||||
5. For `purge-conversation`: deletes `~/.claude/projects/.../jsonl` (claude) or `~/.gemini/antigravity-cli/conversations/...db` + `brain/...` (agy)
|
||||
6. Updates the YAML entry
|
||||
7. If `delegate_job_id` is set, publishes a `completed` event to the delegate-job registry
|
||||
8. Updates the YAML entry:
|
||||
```yaml
|
||||
- name: <SESSION_NAME>
|
||||
status: terminated
|
||||
|
||||
@@ -61,15 +61,17 @@ if [ -z "$AGENT" ]; then
|
||||
esac
|
||||
fi
|
||||
|
||||
# 세션이 YAML 에 있는지 + 해당 row 의 워크스페이스 cwd 추출
|
||||
TARGET_CWD=$(env_python "$AGENT_SESSIONS_YAML" SESSION_NAME="$SESSION_NAME" <<'PYEOF'
|
||||
# 세션이 YAML 에 있는지 + 해당 row 의 워크스페이스 cwd 및 delegate_job_id 추출
|
||||
MAPPED_DATA=$(env_python "$AGENT_SESSIONS_YAML" SESSION_NAME="$SESSION_NAME" <<'PYEOF'
|
||||
import os, yaml
|
||||
name = os.environ['SESSION_NAME']
|
||||
with open(os.environ['YAML_PATH']) as f:
|
||||
d = yaml.safe_load(f) or {}
|
||||
for s in d.get('tmux_sessions', []):
|
||||
if s.get('name') == name:
|
||||
print((s.get('pane') or {}).get('cwd', ''))
|
||||
cwd = (s.get('pane') or {}).get('cwd', '')
|
||||
jid = s.get('delegate_job_id', '') or ''
|
||||
print(f"{cwd}|{jid}")
|
||||
raise SystemExit(0)
|
||||
raise SystemExit(7)
|
||||
PYEOF
|
||||
@@ -78,6 +80,9 @@ PYEOF
|
||||
exit 1
|
||||
}
|
||||
|
||||
TARGET_CWD="${MAPPED_DATA%|*}"
|
||||
DELEGATE_JOB_ID="${MAPPED_DATA#*|}"
|
||||
|
||||
# purge 확인
|
||||
if [ "$PURGE" = "1" ] && [ "$YES" != "1" ]; then
|
||||
echo "DANGER: --purge-conversation will DELETE this workspace's on-disk conversation."
|
||||
@@ -104,6 +109,20 @@ if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then
|
||||
LAST_STATUS=$(tmux capture-pane -t "$SESSION_NAME" -p -S -10 2>/dev/null | tr '\n' ' ' | head -c 500 || true)
|
||||
fi
|
||||
|
||||
if [ -n "$DELEGATE_JOB_ID" ]; then
|
||||
py_bin="python3"
|
||||
d_dir="$(dirname "${BASH_SOURCE[0]}")"
|
||||
while [ "$d_dir" != "/" ] && [ -n "$d_dir" ]; do
|
||||
if [ -x "$d_dir/.venv/bin/python" ]; then
|
||||
py_bin="$d_dir/.venv/bin/python"
|
||||
break
|
||||
fi
|
||||
d_dir="$(dirname "$d_dir")"
|
||||
done
|
||||
pub_script="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/delegate-job/scripts/publish_event.py"
|
||||
"$py_bin" "$pub_script" --job "$DELEGATE_JOB_ID" --event progress --detail "terminating" || true
|
||||
fi
|
||||
|
||||
# hard 모드면 tmux 죽임
|
||||
if [ "$MODE" = "hard" ] && [ "$TMUX_ALIVE" = "1" ]; then
|
||||
tmux kill-session -t "$SESSION_NAME"
|
||||
@@ -187,6 +206,20 @@ elif purge and not purge_uuid:
|
||||
print(f"updated: {name} status={target['status']}", flush=True)
|
||||
PYEOF
|
||||
|
||||
if [ -n "$DELEGATE_JOB_ID" ]; then
|
||||
py_bin="python3"
|
||||
d_dir="$(dirname "${BASH_SOURCE[0]}")"
|
||||
while [ "$d_dir" != "/" ] && [ -n "$d_dir" ]; do
|
||||
if [ -x "$d_dir/.venv/bin/python" ]; then
|
||||
py_bin="$d_dir/.venv/bin/python"
|
||||
break
|
||||
fi
|
||||
d_dir="$(dirname "$d_dir")"
|
||||
done
|
||||
pub_script="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/delegate-job/scripts/publish_event.py"
|
||||
"$py_bin" "$pub_script" --job "$DELEGATE_JOB_ID" --event completed --detail "session terminated" || true
|
||||
fi
|
||||
|
||||
echo
|
||||
echo "=== delete complete ==="
|
||||
echo " session: $SESSION_NAME"
|
||||
|
||||
@@ -87,6 +87,7 @@ case "$AGENT" in
|
||||
esac
|
||||
|
||||
# 4. Update agent-sessions.yaml: status running, last_visible_status
|
||||
# (Also automatically publishes a `progress --detail "resumed"` event to the delegate-job registry if a delegate_job_id exists)
|
||||
bash ~/PuKi/lab/agent_sessions/skills/multi-agent-resume/scripts/update_yaml_resumed.sh \
|
||||
--session "$SESSION_NAME" --uuid "$UUID"
|
||||
|
||||
|
||||
@@ -55,6 +55,19 @@ if [ "$AGENT" = "agy" ] && [ -n "$PANE_PID" ]; then
|
||||
CHILD_PID="${CHILD_PID:-0}"
|
||||
fi
|
||||
|
||||
DELEGATE_JOB_ID=$(env_python "$AGENT_SESSIONS_YAML" SESSION_NAME="$SESSION_NAME" <<'PYEOF'
|
||||
import os, yaml
|
||||
name = os.environ['SESSION_NAME']
|
||||
with open(os.environ['YAML_PATH']) as f:
|
||||
d = yaml.safe_load(f) or {}
|
||||
for s in d.get('tmux_sessions', []):
|
||||
if s.get('name') == name:
|
||||
print(s.get('delegate_job_id', '') or '')
|
||||
raise SystemExit(0)
|
||||
raise SystemExit(0)
|
||||
PYEOF
|
||||
)
|
||||
|
||||
atomic_dump_yaml "$AGENT_SESSIONS_YAML" \
|
||||
SESSION_NAME="$SESSION_NAME" UUID="$UUID" AGENT="$AGENT" NOW_ISO="$NOW_ISO" \
|
||||
PANE_PID="$PANE_PID" CHILD_PID="$CHILD_PID" <<'PYEOF'
|
||||
@@ -104,3 +117,17 @@ snap.pop('terminated_at_epoch', None)
|
||||
|
||||
print(f"updated: {name} status=running (resume id -> per-row own id)", flush=True)
|
||||
PYEOF
|
||||
|
||||
if [ -n "$DELEGATE_JOB_ID" ]; then
|
||||
py_bin="python3"
|
||||
d_dir="$(dirname "${BASH_SOURCE[0]}")"
|
||||
while [ "$d_dir" != "/" ] && [ -n "$d_dir" ]; do
|
||||
if [ -x "$d_dir/.venv/bin/python" ]; then
|
||||
py_bin="$d_dir/.venv/bin/python"
|
||||
break
|
||||
fi
|
||||
d_dir="$(dirname "$d_dir")"
|
||||
done
|
||||
pub_script="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/delegate-job/scripts/publish_event.py"
|
||||
"$py_bin" "$pub_script" --job "$DELEGATE_JOB_ID" --event progress --detail "resumed" || true
|
||||
fi
|
||||
|
||||
@@ -55,24 +55,14 @@ The script:
|
||||
## Output format (default = aligned table)
|
||||
|
||||
```
|
||||
AGENT SESSIONS STATUS
|
||||
yaml_path: ~/PuKi/lab/agent_sessions/agent-sessions.yaml
|
||||
tmux_sessions_alive: 2
|
||||
yaml_entries: 3
|
||||
unregistered: 0
|
||||
drifts: 0
|
||||
|
||||
NAME | YAML | TMUX | PANE CMD | PANE CWD | RESUME UUID ON DISK
|
||||
--------------------------------------------------|----------|-------|-------------------|---------------------------------------------------|--------------------
|
||||
lab-landing-page-creator-claude | running | ✓ | claude | /home/.../refer_landing_page | 87dc548e-... ✓
|
||||
lab-landing-page-creator-agy | terminated| ✗ | - | - | 22255a9a-... ✓ (orphan)
|
||||
lab-paper-pdf2md-creator-claude | running | ✓ | claude | /home/.../paper-pdf2md | -
|
||||
|
||||
DRIFTS
|
||||
(none)
|
||||
|
||||
UNREGISTERED TMUX SESSIONS
|
||||
(none)
|
||||
agent-sessions status — 2026-06-19T14:20:00Z (tmux_confirmed=True)
|
||||
========================================================================================================================================
|
||||
NAME SERVER YAML TMUX CMD RESUME JOB_ID JOB_STATUS DRIFT
|
||||
----------------------------------------------------------------------------------------------------------------------------------------
|
||||
lab-landing-page-creator-claude default running alive claude yes - - -
|
||||
lab-landing-page-creator-agy default terminated dead agy yes 5fe09ba8 completed -
|
||||
lab-paper-pdf2md-creator-claude default running alive claude scan - - -
|
||||
========================================================================================================================================
|
||||
```
|
||||
|
||||
## Output format (`--json`)
|
||||
|
||||
@@ -60,11 +60,49 @@ def resume_on_disk(s):
|
||||
return '?'
|
||||
|
||||
|
||||
def get_job_status(s):
|
||||
jid = s.get('delegate_job_id')
|
||||
if not jid:
|
||||
return ('-', '-')
|
||||
|
||||
# Try workspace relative
|
||||
path = os.path.join('.hermes', 'jobs', f"{jid}.json")
|
||||
if os.path.exists(path):
|
||||
try:
|
||||
with open(path) as jf:
|
||||
job_data = json.load(jf)
|
||||
return (jid, job_data.get('status', 'unknown'))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try fixed absolute path of registry
|
||||
path_fixed = os.path.join('/home/godopu16/PuKi/laa/canary_projects/advanced_multi_agent', '.hermes', 'jobs', f"{jid}.json")
|
||||
if os.path.exists(path_fixed):
|
||||
try:
|
||||
with open(path_fixed) as jf:
|
||||
job_data = json.load(jf)
|
||||
return (jid, job_data.get('status', 'unknown'))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Try audit log status.json
|
||||
path_audit = os.path.join('/home/godopu16/PuKi/laa/canary_projects/advanced_multi_agent', '.hermes', 'delegate_job_logs', jid, 'status.json')
|
||||
if os.path.exists(path_audit):
|
||||
try:
|
||||
with open(path_audit) as jf:
|
||||
job_data = json.load(jf)
|
||||
return (jid, job_data.get('status', 'unknown'))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return (jid, 'unknown')
|
||||
|
||||
|
||||
sessions = d.get('tmux_sessions', [])
|
||||
print(f"agent-sessions status — {drift['timestamp']} (tmux_confirmed={drift['tmux_confirmed']})")
|
||||
print("=" * 116)
|
||||
print(f"{'NAME':<44} {'SERVER':<12} {'YAML':<10} {'TMUX':<6} {'CMD':<6} {'RESUME':<8} DRIFT")
|
||||
print("-" * 116)
|
||||
print("=" * 136)
|
||||
print(f"{'NAME':<44} {'SERVER':<12} {'YAML':<10} {'TMUX':<6} {'CMD':<6} {'RESUME':<8} {'JOB_ID':<10} {'JOB_STATUS':<12} DRIFT")
|
||||
print("-" * 136)
|
||||
if not sessions:
|
||||
print("(no sessions registered)")
|
||||
for s in sessions:
|
||||
@@ -74,15 +112,16 @@ for s in sessions:
|
||||
tmux = 'alive' if f"{name}|{server}" in alive else 'dead'
|
||||
cmd = (s.get('pane') or {}).get('cmd', '?')
|
||||
res = resume_on_disk(s)
|
||||
jid, jstatus = get_job_status(s)
|
||||
drs = ','.join(drift_by_name.get(name, [])) or '-'
|
||||
print(f"{name:<44} {server:<12} {status:<10} {tmux:<6} {cmd:<6} {res:<8} {drs}")
|
||||
print(f"{name:<44} {server:<12} {status:<10} {tmux:<6} {cmd:<6} {res:<8} {jid:<10} {jstatus:<12} {drs}")
|
||||
# drifts not tied to a registered row (e.g. class B unregistered, class D cache)
|
||||
known = {s.get('name') for s in sessions}
|
||||
extra = [dr for dr in drift.get('drifts', []) if dr['name'] not in known]
|
||||
if extra:
|
||||
print("-" * 116)
|
||||
print("-" * 136)
|
||||
for dr in extra:
|
||||
print(f" [{dr['class']}] {dr['msg']}")
|
||||
print("=" * 116)
|
||||
print("=" * 136)
|
||||
print(f"alive tmux: {sorted(alive)}")
|
||||
PYEOF
|
||||
|
||||
Reference in New Issue
Block a user