diff --git a/skills/lib.sh b/skills/lib.sh index bf1d011..76772e9 100644 --- a/skills/lib.sh +++ b/skills/lib.sh @@ -214,7 +214,7 @@ def _validate(d): sessions = d.get('tmux_sessions', []) if not isinstance(sessions, list): raise SystemExit("VALIDATE: tmux_sessions is not a list") - valid = {'running', 'terminated', 'archived'} + valid = {'running', 'terminated', 'archived', 'stopped'} for i, s in enumerate(sessions): if not isinstance(s, dict): raise SystemExit(f"VALIDATE: tmux_sessions[{i}] not a mapping") @@ -370,6 +370,46 @@ print('') PYEOF } +# --------------------------------------------------------------------------- +# capture_conversation_id +# +# Thin wrapper over find_workspace_uuid: resolves THIS workspace's conversation +# id (claude jsonl sessionId / agy db uuid) and prints it on stdout (empty line +# if none). find_workspace_uuid is already a workspace-scoped, 3-tier, race-free +# resolver (per-row own id -> workspace-scoped disk scan -> cwd-matched cache), +# so recording its result into the row before kill guarantees tier-1 on the next +# resume. Always exits 0. +# --------------------------------------------------------------------------- +capture_conversation_id() { + local agent="$1" workdir="$2" + find_workspace_uuid "$workdir" "$agent" +} + +# --------------------------------------------------------------------------- +# is_already_stopped +# +# Exits 0 if the row's status is 'stopped' (printing "stopped_at=" on +# stdout), 1 otherwise (including not-found). Used for idempotency: a second +# stop on an already-stopped session is a no-op. +# --------------------------------------------------------------------------- +is_already_stopped() { + local session_name="$1" + SESSION_NAME="$session_name" env_python "$AGENT_SESSIONS_YAML" <<'PYEOF' +import os, yaml +name = os.environ['SESSION_NAME'] +yaml_path = os.environ['YAML_PATH'] +d = {} +if os.path.exists(yaml_path): + with open(yaml_path) as f: + d = yaml.safe_load(f) or {} +for s in d.get('tmux_sessions', []): + if s.get('name') == name and s.get('status') == 'stopped': + print(f"stopped_at={s.get('stopped_at', '?')}") + raise SystemExit(0) +raise SystemExit(1) +PYEOF +} + # --------------------------------------------------------------------------- # tmux-agent-orchestrate-delegate-job integration helpers # diff --git a/skills/tmux-agent-orchestrate-delete/SKILL.md b/skills/tmux-agent-orchestrate-delete/SKILL.md index cb1eb83..55b20da 100644 --- a/skills/tmux-agent-orchestrate-delete/SKILL.md +++ b/skills/tmux-agent-orchestrate-delete/SKILL.md @@ -78,6 +78,45 @@ bash skills/tmux-agent-orchestrate-delete/scripts/delete_session.sh \ --session "$SESSION_NAME" --mode hard --purge-conversation ``` +## Stop extension (Option A — `stop` semantics without a 6th skill) + +Rather than a separate `tmux-agent-orchestrate-stop` route, `delete` absorbs the +"stop" intent via three opt-in options. Passing **any** of them switches the YAML +transition from `terminated` to **`stopped`** (`running → stopped`), signalling +"deliberately stopped, conversation preserved, ready to resume": + +```bash +# Stop: capture the conversation id into the row, record a reason, exit gracefully. +bash skills/tmux-agent-orchestrate-delete/scripts/delete_session.sh \ + --session "$SESSION_NAME" --capture-id --reason api_error --graceful +``` + +| Option | Effect | +|---|---| +| `--capture-id` | Before kill, resolve THIS workspace's conversation id via `find_workspace_uuid` (per-row → workspace-scoped disk scan → cache) and record it to `claude_session_id_own` / `agy_conversation_id_own`, plus `resumable: true`. Guarantees the next resume hits **tier-1** (race-free) instead of the mtime-based disk-scan fallback. | +| `--reason ` | Records `stop_reason` (default `manual_stop`). Convention: `user_request` / `api_error` / `timeout` / `crash` / `manual_stop`. | +| `--graceful` | `tmux send-keys` exit (`/exit` for claude, `Exit` for agy) → 3 s wait → if alive `tmux kill-session` (SIGTERM) → 5 s → `kill -9` pane pid as last resort. Avoids hard-killing a TUI mid-write. | + +**Idempotency**: in STOP mode, if the row is already `status: stopped`, the script +prints `already stopped (...)` and exits 0 — re-running is a safe no-op. + +**Backward compatibility**: with none of these options, `delete` behaves exactly as +before (`hard`→`terminated`, `soft`→`archived`). + +### State machine + +``` +running ──(delete --mode hard)────────────────► terminated +running ──(delete --capture-id/--reason/--graceful)► stopped (resumable, conv preserved) +running ──(delete --mode soft)────────────────► archived (tmux left alive) +stopped ──(delete --capture-id … again)───────► stopped (idempotent no-op) +any ──(delete --purge-conversation --yes)─► (conv deleted, resumable:false) +``` + +Fields written in STOP mode: `status: stopped`, `stopped_at`, `stopped_at_epoch`, +`stop_reason`, `termination_mode: stop|graceful`, and (with `--capture-id`) +`claude_session_id_own`/`agy_conversation_id_own` + `resumable: true`. + The script: 1. Verifies the session is in agent-sessions.yaml 2. If `delegate_job_id` is set, automatically publishes a `progress --detail "terminating"` event to the tmux-agent-orchestrate-delegate-job registry diff --git a/skills/tmux-agent-orchestrate-delete/scripts/delete_session.sh b/skills/tmux-agent-orchestrate-delete/scripts/delete_session.sh index ca9c2f9..38473be 100755 --- a/skills/tmux-agent-orchestrate-delete/scripts/delete_session.sh +++ b/skills/tmux-agent-orchestrate-delete/scripts/delete_session.sh @@ -12,9 +12,21 @@ # 격리된* conversation artifact 만 삭제 (P0-C). 전역 # agent_identities 를 참조하지 않음. resume 불가. # +# Stop extension (Option A — delete 확장, 새 6번째 스킬 없이 stop 의미론 흡수): +# --capture-id — kill 직전에 이 워크스페이스의 conversation id 를 row 에 확정 +# 기록 (claude_session_id_own / agy_conversation_id_own) → +# 다음 resume 이 tier-1(race-free) 로 복원. find_workspace_uuid +# 재사용 (per-row -> workspace-scoped disk scan -> cache). +# --reason R — 상태 전이 사유 (stop_reason). 기본값 manual_stop. +# --graceful — kill-session 즉시 종료 대신 send-keys 로 정상 종료 유도 → +# 3초 대기 → 미종료 시 kill-session(SIGTERM) → 5초 → SIGKILL. +# 위 세 옵션 중 하나라도 주면 STOP 모드: status 가 terminated 가 아니라 stopped +# 로 전이 (running -> stopped). 멱등: 이미 stopped 면 no-op + exit 0. +# 옵션 미지정 시 기존 hard/soft 동작 그대로 (backward compatible). +# # Exit codes: -# 0 = success | 1 = YAML not found / not registered | 2 = invalid args -# 3 = interactive confirmation required (--yes 누락) +# 0 = success (or already-stopped no-op) | 1 = YAML not found / not registered +# 2 = invalid args | 3 = interactive confirmation required (--yes 누락) set -euo pipefail source "$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/lib.sh" @@ -22,10 +34,17 @@ source "$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)/lib.sh" usage() { cat < [--agent claude|agy] [--mode soft|hard] [--purge-conversation] [--yes] + [--capture-id] [--reason ] [--graceful] Modes: soft — update YAML to status=archived, leave tmux running hard (default) — tmux kill-session + update YAML to status=terminated + +Stop extension (any of these → STOP mode, status=stopped instead of terminated): + --capture-id — record this workspace's conversation id to the row before kill + --reason — stop_reason field (default: manual_stop) + --graceful — send-keys exit → 3s → kill-session → 5s → SIGKILL fallback + (idempotent: stopping an already-stopped session is a no-op with exit 0) EOF } @@ -34,6 +53,10 @@ AGENT="" MODE="hard" # "delete" 의 자연스러운 의미 = tmux 까지 종료 PURGE=0 YES=0 +CAPTURE_ID=0 +GRACEFUL=0 +REASON="" +STOP_MODE=0 while [ $# -gt 0 ]; do case "$1" in @@ -42,6 +65,9 @@ while [ $# -gt 0 ]; do --mode) MODE="$2"; shift 2 ;; --purge-conversation) PURGE=1; shift ;; --yes) YES=1; shift ;; + --capture-id) CAPTURE_ID=1; STOP_MODE=1; shift ;; + --reason) REASON="$2"; STOP_MODE=1; shift 2 ;; + --graceful) GRACEFUL=1; STOP_MODE=1; shift ;; -h|--help) usage; exit 0 ;; *) echo "ERROR: unknown arg: $1" >&2; usage; exit 2 ;; esac @@ -50,6 +76,11 @@ done [ "$MODE" = "soft" ] || [ "$MODE" = "hard" ] || { echo "ERROR: --mode must be soft or hard" >&2; exit 2; } [ -f "$AGENT_SESSIONS_YAML" ] || { echo "ERROR: $AGENT_SESSIONS_YAML not found" >&2; exit 1; } +# STOP 모드 기본 사유 +if [ "$STOP_MODE" = "1" ] && [ -z "$REASON" ]; then + REASON="manual_stop" +fi + export TMUX_SERVER_NAME="$(resolve_tmux_server "$SESSION_NAME")" # --agent 미지정 시 이름 suffix 로 fallback (P1-F) @@ -84,6 +115,14 @@ PYEOF TARGET_CWD=$(printf '%s' "$MAPPED_DATA" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("cwd",""))') DELEGATE_JOB_ID=$(printf '%s' "$MAPPED_DATA" | python3 -c 'import sys,json; print(json.load(sys.stdin).get("job_id",""))') +# 멱등성: STOP 모드에서 이미 stopped 인 세션이면 no-op + exit 0 +if [ "$STOP_MODE" = "1" ]; then + if STOPPED_INFO=$(is_already_stopped "$SESSION_NAME"); then + echo "already stopped (status=stopped, $STOPPED_INFO) — no-op" + exit 0 + fi +fi + # purge 확인 if [ "$PURGE" = "1" ] && [ "$YES" != "1" ]; then echo "DANGER: --purge-conversation will DELETE this workspace's on-disk conversation." @@ -110,10 +149,52 @@ if tmux has-session -t "$SESSION_NAME" 2>/dev/null; then LAST_STATUS=$(tmux capture-pane -t "$SESSION_NAME" -p -S -10 2>/dev/null | tr '\n' ' ' | head -c 500 || true) fi +# --capture-id: kill 직전에 conversation id 를 해결 (process/jsonl 이 아직 살아있을 때). +# find_workspace_uuid 가 tier-1(row) -> tier-2(workspace-scoped disk scan) -> tier-3(cache) +# 를 알아서 시도하므로 tmux 생사와 무관하게 동작. +CAPTURED_UUID="" +if [ "$CAPTURE_ID" = "1" ] && [ -n "$TARGET_CWD" ]; then + CAPTURED_UUID=$(capture_conversation_id "$AGENT" "$TARGET_CWD" || true) + if [ -n "$CAPTURED_UUID" ]; then + echo "captured conversation id: $CAPTURED_UUID" + else + echo "WARN: --capture-id requested but no conversation id resolved (nothing on disk yet)" + fi +fi + delegate_publish_event "$DELEGATE_JOB_ID" progress "terminating" -# hard 모드면 tmux 죽임 -if [ "$MODE" = "hard" ] && [ "$TMUX_ALIVE" = "1" ]; then +# --graceful: send-keys 로 정상 종료 유도 → 폴백 체인 (SIGTERM → SIGKILL). +graceful_stop() { + local pane_pid exitkey + pane_pid=$(tmux list-panes -t "$SESSION_NAME" -F '#{pane_pid}' 2>/dev/null | head -1 || true) + case "$AGENT" in + claude) exitkey="/exit" ;; + agy) exitkey="Exit" ;; + *) exitkey="/exit" ;; + esac + echo "graceful: send-keys '$exitkey' to $SESSION_NAME" + tmux send-keys -t "$SESSION_NAME" "$exitkey" Enter 2>/dev/null || true + sleep 3 + if ! tmux has-session -t "$SESSION_NAME" 2>/dev/null; then + echo "graceful: exited cleanly" + return 0 + fi + echo "graceful: still alive → kill-session (SIGTERM)" + tmux kill-session -t "$SESSION_NAME" 2>/dev/null || true + sleep 5 + if ! tmux has-session -t "$SESSION_NAME" 2>/dev/null; then + echo "graceful: terminated after kill-session" + return 0 + fi + echo "graceful: STILL alive → SIGKILL fallback (pane pid $pane_pid)" + [ -n "$pane_pid" ] && kill -9 "$pane_pid" 2>/dev/null || true +} + +# tmux 종료: graceful 이면 폴백 체인, 아니면 기존 hard kill. +if [ "$GRACEFUL" = "1" ] && [ "$TMUX_ALIVE" = "1" ]; then + graceful_stop +elif [ "$MODE" = "hard" ] && [ "$TMUX_ALIVE" = "1" ]; then tmux kill-session -t "$SESSION_NAME" echo "killed tmux: $SESSION_NAME" elif [ "$MODE" = "hard" ]; then @@ -123,7 +204,9 @@ fi atomic_dump_yaml "$AGENT_SESSIONS_YAML" \ SESSION_NAME="$SESSION_NAME" AGENT="$AGENT" MODE="$MODE" PURGE="$PURGE" \ NOW_ISO="$NOW_ISO" NOW_EPOCH="$NOW_EPOCH" LAST_STATUS="$LAST_STATUS" \ - PURGE_UUID="$PURGE_UUID" TARGET_CWD="$TARGET_CWD" <<'PYEOF' + PURGE_UUID="$PURGE_UUID" TARGET_CWD="$TARGET_CWD" \ + STOP_MODE="$STOP_MODE" REASON="$REASON" GRACEFUL="$GRACEFUL" \ + CAPTURED_UUID="$CAPTURED_UUID" <<'PYEOF' import shutil name = os.environ['SESSION_NAME'] agent = os.environ['AGENT'] @@ -134,6 +217,10 @@ home = os.environ['HOME_DIR'] last_status = os.environ.get('LAST_STATUS', '') purge_uuid = os.environ.get('PURGE_UUID', '').strip() ws = os.environ.get('TARGET_CWD', '') +stop_mode = os.environ.get('STOP_MODE') == '1' +graceful = os.environ.get('GRACEFUL') == '1' +reason = os.environ.get('REASON', '') or 'manual_stop' +captured = os.environ.get('CAPTURED_UUID', '').strip() target = None for s in d.get('tmux_sessions', []): @@ -149,6 +236,13 @@ if mode == 'soft': target['status'] = 'archived' target['archived_at'] = now target['termination_mode'] = 'soft' +elif stop_mode: + # STOP 모드: running -> stopped (terminated 와 의도 구분). conversation 보존. + target['status'] = 'stopped' + target['stopped_at'] = now + target['stopped_at_epoch'] = int(os.environ['NOW_EPOCH']) + target['stop_reason'] = reason + target['termination_mode'] = 'graceful' if graceful else 'stop' else: target['status'] = 'terminated' target['terminated_at'] = now @@ -158,6 +252,15 @@ else: if last_status: target['last_visible_status_at_termination'] = last_status +# --capture-id: 해결된 conversation id 를 per-row own id 에 확정 기록 (tier-1 보장). +# purge 와 함께면 어차피 아래에서 지워지므로 기록하지 않는다. +if captured and not purge: + if agent == 'claude': + target['claude_session_id_own'] = captured + elif agent == 'agy': + target['agy_conversation_id_own'] = captured + target['resumable'] = True + # --purge-conversation: 워크스페이스 격리된 UUID 의 디스크 artifact 만 삭제 (P0-C) if purge and purge_uuid: if agent == 'claude': @@ -193,16 +296,25 @@ if purge and purge_uuid: elif purge and not purge_uuid: print("WARN: --purge-conversation requested but no workspace-scoped UUID resolved; nothing purged", flush=True) +if purge: + target['resumable'] = False + print(f"updated: {name} status={target['status']}", flush=True) PYEOF delegate_publish_event "$DELEGATE_JOB_ID" completed "session terminated" echo -echo "=== delete complete ===" +if [ "$STOP_MODE" = "1" ]; then + echo "=== stop complete ===" +else + echo "=== delete complete ===" +fi echo " session: $SESSION_NAME" echo " agent: $AGENT" -echo " mode: $MODE" +echo " mode: $MODE${STOP_MODE:+ (stop)}${GRACEFUL:+ +graceful}" +[ "$STOP_MODE" = "1" ] && echo " reason: $REASON" +[ "$CAPTURE_ID" = "1" ] && echo " captured: ${CAPTURED_UUID:-}" echo " purge: $PURGE${PURGE_UUID:+ (uuid $PURGE_UUID)}" echo " time: $NOW_ISO" echo